taxonium-component
Version:
React component for exploring large phylogenetic trees in the browser
661 lines (660 loc) • 20.7 kB
JavaScript
import { A as J } from "./AbortablePromiseCache-CcuMrnn7.js";
import { u as V, a as K } from "./unzip-NIBF0hze.js";
import { L as B } from "./browser-BpRiKmO-.js";
import { R as A, Q as Z } from "./remoteFile-H_6BTCFF.js";
class D {
constructor(e, t, n, i = void 0) {
this.minv = e, this.maxv = t, this.bin = n, this._fetchedSize = i;
}
toUniqueString() {
return `${this.minv}..${this.maxv} (bin ${this.bin}, fetchedSize ${this.fetchedSize()})`;
}
toString() {
return this.toUniqueString();
}
compareTo(e) {
return this.minv.compareTo(e.minv) || this.maxv.compareTo(e.maxv) || this.bin - e.bin;
}
fetchedSize() {
return this._fetchedSize !== void 0 ? this._fetchedSize : this.maxv.blockPosition + 65536 - this.minv.blockPosition;
}
}
class G {
constructor({ filehandle: e, renameRefSeqs: t = (n) => n }) {
this.filehandle = e, this.renameRefSeq = t;
}
async getMetadata(e = {}) {
const { indices: t, ...n } = await this.parse(e);
return n;
}
_findFirstData(e, t) {
return e ? e.compareTo(t) > 0 ? t : e : t;
}
async parse(e = {}) {
return this.parseP || (this.parseP = this._parse(e).catch((t) => {
throw this.parseP = void 0, t;
})), this.parseP;
}
async hasRefSeq(e, t = {}) {
var i;
return !!((i = (await this.parse(t)).indices[e]) != null && i.binIndex);
}
}
const W = 65536, ee = W * W;
function Q(s, e = 0) {
const t = s[e] | s[e + 1] << 8 | s[e + 2] << 16 | s[e + 3] << 24;
return ((s[e + 4] | s[e + 5] << 8 | s[e + 6] << 16 | s[e + 7] << 24) >>> 0) * ee + (t >>> 0);
}
class te extends Error {
}
function N(s) {
if (s && s.aborted) {
if (typeof DOMException < "u")
throw new DOMException("aborted", "AbortError");
{
const e = new te("aborted");
throw e.code = "ERR_ABORTED", e;
}
}
}
function ne(s, e) {
return e.minv.blockPosition - s.maxv.blockPosition < 65e3 && e.maxv.blockPosition - s.minv.blockPosition < 5e6;
}
function X(s, e) {
const t = [];
let n = null;
return s.length === 0 ? s : (s.sort(function(i, o) {
const a = i.minv.blockPosition - o.minv.blockPosition;
return a !== 0 ? a : i.minv.dataPosition - o.minv.dataPosition;
}), s.forEach((i) => {
(!e || i.maxv.compareTo(e) > 0) && (n === null ? (t.push(i), n = i) : ne(n, i) ? i.maxv.compareTo(n.maxv) > 0 && (n.maxv = i.maxv) : (t.push(i), n = i));
}), t);
}
class z {
constructor(e, t) {
this.blockPosition = e, this.dataPosition = t;
}
toString() {
return `${this.blockPosition}:${this.dataPosition}`;
}
compareTo(e) {
return this.blockPosition - e.blockPosition || this.dataPosition - e.dataPosition;
}
}
function S(s, e = 0) {
return new z(s[e + 7] * 1099511627776 + s[e + 6] * 4294967296 + s[e + 5] * 16777216 + s[e + 4] * 65536 + s[e + 3] * 256 + s[e + 2], s[e + 1] << 8 | s[e]);
}
const ie = 21582659, re = 38359875, se = {
0: "generic",
1: "SAM",
2: "VCF"
};
function oe(s, e) {
return s * 2 ** e;
}
function H(s, e) {
return Math.floor(s / 2 ** e);
}
class M extends G {
constructor(e) {
super(e), this.maxBinNumber = 0, this.depth = 0, this.minShift = 0;
}
async lineCount(e, t = {}) {
const n = await this.parse(t), i = n.refNameToId[e];
if (i === void 0 || !n.indices[i])
return -1;
const { stats: a } = n.indices[i];
return a ? a.lineCount : -1;
}
indexCov() {
throw new Error("CSI indexes do not support indexcov");
}
parseAuxData(e, t) {
const n = new DataView(e.buffer), i = n.getInt32(t, !0), o = i & 65536 ? "zero-based-half-open" : "1-based-closed", a = se[i & 15];
if (!a)
throw new Error(`invalid Tabix preset format flags ${i}`);
const r = {
ref: n.getInt32(t + 4, !0),
start: n.getInt32(t + 8, !0),
end: n.getInt32(t + 12, !0)
}, l = n.getInt32(t + 16, !0), h = l ? String.fromCharCode(l) : null, c = n.getInt32(t + 20, !0), f = n.getInt32(t + 24, !0), { refIdToName: d, refNameToId: x } = this._parseNameBytes(e.subarray(t + 28, t + 28 + f));
return {
refIdToName: d,
refNameToId: x,
skipLines: c,
metaChar: h,
columnNumbers: r,
format: a,
coordinateType: o
};
}
_parseNameBytes(e) {
let t = 0, n = 0;
const i = [], o = {}, a = new TextDecoder("utf8");
for (let r = 0; r < e.length; r += 1)
if (!e[r]) {
if (n < r) {
const l = this.renameRefSeq(a.decode(e.subarray(n, r)));
i[t] = l, o[l] = t;
}
n = r + 1, t += 1;
}
return {
refNameToId: o,
refIdToName: i
};
}
// fetch and parse the index
async _parse(e = {}) {
const t = await V(await this.filehandle.readFile(e)), n = new DataView(t.buffer);
let i;
if (n.getUint32(0, !0) === ie)
i = 1;
else if (n.getUint32(0, !0) === re)
i = 2;
else
throw new Error("Not a CSI file");
this.minShift = n.getInt32(4, !0), this.depth = n.getInt32(8, !0), this.maxBinNumber = ((1 << (this.depth + 1) * 3) - 1) / 7;
const o = 2 ** (this.minShift + this.depth * 3), a = n.getInt32(12, !0), r = a && a >= 30 ? this.parseAuxData(t, 16) : {
refIdToName: [],
refNameToId: {},
metaChar: null,
columnNumbers: { ref: 0, start: 1, end: 2 },
coordinateType: "zero-based-half-open",
format: "generic"
}, l = n.getInt32(16 + a, !0);
let h, c = 16 + a + 4;
const f = new Array(l).fill(0).map(() => {
const d = n.getInt32(c, !0);
c += 4;
const x = {};
let g;
for (let w = 0; w < d; w += 1) {
const b = n.getUint32(c, !0);
if (b > this.maxBinNumber)
g = this.parsePseudoBin(t, c + 4), c += 48;
else {
const u = S(t, c + 4);
h = this._findFirstData(h, u);
const I = n.getInt32(c + 12, !0);
c += 16;
const p = new Array(I);
for (let v = 0; v < I; v += 1) {
const m = S(t, c), C = S(t, c + 8);
c += 16, p[v] = new D(m, C, b);
}
x[b] = p;
}
}
return { binIndex: x, stats: g };
});
return {
...r,
csi: !0,
refCount: l,
maxBlockSize: 65536,
firstDataLine: h,
csiVersion: i,
indices: f,
depth: this.depth,
maxBinNumber: this.maxBinNumber,
maxRefLength: o
};
}
parsePseudoBin(e, t) {
return {
lineCount: Q(e, t + 28)
};
}
async blocksForRange(e, t, n, i = {}) {
t < 0 && (t = 0);
const o = await this.parse(i), a = o.refNameToId[e];
if (a === void 0)
return [];
const r = o.indices[a];
if (!r)
return [];
const l = this.reg2bins(t, n), h = [];
for (const [c, f] of l)
for (let d = c; d <= f; d++)
if (r.binIndex[d])
for (const x of r.binIndex[d])
h.push(new D(x.minv, x.maxv, d));
return X(h, new z(0, 0));
}
/**
* calculate the list of bins that may overlap with region [beg,end) (zero-based half-open)
*/
reg2bins(e, t) {
e -= 1, e < 1 && (e = 1), t > 2 ** 50 && (t = 2 ** 34), t -= 1;
let n = 0, i = 0, o = this.minShift + this.depth * 3;
const a = [];
for (; n <= this.depth; o -= 3, i += oe(1, n * 3), n += 1) {
const r = i + H(e, o), l = i + H(t, o);
if (l - r + a.length > this.maxBinNumber)
throw new Error(`query ${e}-${t} is too large for current binning scheme (shift ${this.minShift}, depth ${this.depth}), try a smaller query or a coarser index binning scheme`);
a.push([r, l]);
}
return a;
}
}
const ae = 21578324, j = 14;
function ce(s, e) {
return s += 1, e -= 1, [
[0, 0],
[1 + (s >> 26), 1 + (e >> 26)],
[9 + (s >> 23), 9 + (e >> 23)],
[73 + (s >> 20), 73 + (e >> 20)],
[585 + (s >> 17), 585 + (e >> 17)],
[4681 + (s >> 14), 4681 + (e >> 14)]
];
}
class y extends G {
async lineCount(e, t = {}) {
var a;
const n = await this.parse(t), i = n.refNameToId[e];
return i === void 0 || !n.indices[i] ? -1 : ((a = n.indices[i].stats) == null ? void 0 : a.lineCount) ?? -1;
}
// fetch and parse the index
async _parse(e = {}) {
const t = await this.filehandle.readFile(e), n = await V(t);
N(e.signal);
const i = new DataView(n.buffer);
if (i.getUint32(0, !0) !== ae)
throw new Error("Not a TBI file");
const a = i.getUint32(4, !0), r = i.getUint32(8, !0), l = r & 65536 ? "zero-based-half-open" : "1-based-closed", c = {
0: "generic",
1: "SAM",
2: "VCF"
}[r & 15];
if (!c)
throw new Error(`invalid Tabix preset format flags ${r}`);
const f = {
ref: i.getInt32(12, !0),
start: i.getInt32(16, !0),
end: i.getInt32(20, !0)
}, d = i.getInt32(24, !0), x = 5, g = ((1 << (x + 1) * 3) - 1) / 7, w = 2 ** (14 + x * 3), b = d ? String.fromCharCode(d) : null, u = i.getInt32(28, !0), I = i.getInt32(32, !0), { refNameToId: p, refIdToName: v } = this._parseNameBytes(n.slice(36, 36 + I));
let m = 36 + I, C;
return {
indices: new Array(a).fill(0).map(() => {
const F = i.getInt32(m, !0);
m += 4;
const P = {};
let $;
for (let T = 0; T < F; T += 1) {
const _ = i.getUint32(m, !0);
if (m += 4, _ > g + 1)
throw new Error("tabix index contains too many bins, please use a CSI index");
if (_ === g + 1) {
const k = i.getInt32(m, !0);
m += 4, k === 2 && ($ = this.parsePseudoBin(n, m)), m += 16 * k;
} else {
const k = i.getInt32(m, !0);
m += 4;
const U = new Array(k);
for (let R = 0; R < k; R += 1) {
const q = S(n, m), Y = S(n, m + 8);
m += 16, C = this._findFirstData(C, q), U[R] = new D(q, Y, _);
}
P[_] = U;
}
}
const O = i.getInt32(m, !0);
m += 4;
const L = new Array(O);
for (let T = 0; T < O; T += 1)
L[T] = S(n, m), m += 8, C = this._findFirstData(C, L[T]);
return {
binIndex: P,
linearIndex: L,
stats: $
};
}),
metaChar: b,
maxBinNumber: g,
maxRefLength: w,
skipLines: u,
firstDataLine: C,
columnNumbers: f,
coordinateType: l,
format: c,
refIdToName: v,
refNameToId: p,
maxBlockSize: 65536
};
}
parsePseudoBin(e, t) {
return {
lineCount: Q(e, t + 16)
};
}
_parseNameBytes(e) {
let t = 0, n = 0;
const i = [], o = {}, a = new TextDecoder("utf8");
for (let r = 0; r < e.length; r += 1)
if (!e[r]) {
if (n < r) {
const l = this.renameRefSeq(a.decode(e.subarray(n, r)));
i[t] = l, o[l] = t;
}
n = r + 1, t += 1;
}
return {
refNameToId: o,
refIdToName: i
};
}
async blocksForRange(e, t, n, i = {}) {
t < 0 && (t = 0);
const o = await this.parse(i), a = o.refNameToId[e];
if (a === void 0)
return [];
const r = o.indices[a];
if (!r)
return [];
(r.linearIndex.length ? r.linearIndex[t >> j >= r.linearIndex.length ? r.linearIndex.length - 1 : t >> j] : new z(0, 0)) || console.warn("querying outside of possible tabix range");
const h = ce(t, n), c = [];
for (const [w, b] of h)
for (let u = w; u <= b; u++)
if (r.binIndex[u])
for (const I of r.binIndex[u])
c.push(new D(I.minv, I.maxv, u));
const f = r.linearIndex.length;
let d = null;
const x = Math.min(t >> 14, f - 1), g = Math.min(n >> 14, f - 1);
for (let w = x; w <= g; ++w) {
const b = r.linearIndex[w];
b && (!d || b.compareTo(d) < 0) && (d = b);
}
return X(c, d);
}
}
function le(s) {
return /^[\u0000-\u007F]*$/.test(s);
}
class me {
/**
* @param {object} args
*
* @param {string} [args.path]
*
* @param {filehandle} [args.filehandle]
*
* @param {string} [args.tbiPath]
*
* @param {filehandle} [args.tbiFilehandle]
*
* @param {string} [args.csiPath]
*
* @param {filehandle} [args.csiFilehandle]
*
* @param {url} [args.url]
*
* @param {csiUrl} [args.csiUrl]
*
* @param {tbiUrl} [args.tbiUrl]
*
* @param {function} [args.renameRefSeqs] optional function with sig `string
* => string` to transform reference sequence names for the purpose of
* indexing and querying. note that the data that is returned is not altered,
* just the names of the reference sequences that are used for querying.
*/
constructor({ path: e, filehandle: t, url: n, tbiPath: i, tbiUrl: o, tbiFilehandle: a, csiPath: r, csiUrl: l, csiFilehandle: h, renameRefSeqs: c = (d) => d, chunkCacheSize: f = 5 * 2 ** 20 }) {
if (t)
this.filehandle = t;
else if (e)
this.filehandle = new B(e);
else if (n)
this.filehandle = new A(n);
else
throw new TypeError("must provide either filehandle or path");
if (a)
this.index = new y({
filehandle: a,
renameRefSeqs: c
});
else if (h)
this.index = new M({
filehandle: h,
renameRefSeqs: c
});
else if (i)
this.index = new y({
filehandle: new B(i),
renameRefSeqs: c
});
else if (r)
this.index = new M({
filehandle: new B(r),
renameRefSeqs: c
});
else if (e)
this.index = new y({
filehandle: new B(`${e}.tbi`),
renameRefSeqs: c
});
else if (l)
this.index = new M({
filehandle: new A(l)
});
else if (o)
this.index = new y({
filehandle: new A(o)
});
else if (n)
this.index = new y({
filehandle: new A(`${n}.tbi`)
});
else
throw new TypeError("must provide one of tbiFilehandle, tbiPath, csiFilehandle, csiPath, tbiUrl, csiUrl");
this.renameRefSeq = c, this.chunkCache = new J({
cache: new Z({ maxSize: Math.floor(f / 65536) }),
fill: (d, x) => this.readChunk(d, { signal: x })
});
}
/**
* @param refName name of the reference sequence
*
* @param start start of the region (in 0-based half-open coordinates)
*
* @param end end of the region (in 0-based half-open coordinates)
*
* @param opts callback called for each line in the region. can also pass a
* object param containing obj.lineCallback, obj.signal, etc
*
* @returns promise that is resolved when the whole read is finished,
* rejected on error
*/
async getLines(e, t, n, i) {
let o, a = {}, r;
typeof i == "function" ? r = i : (a = i, r = i.lineCallback, o = i.signal);
const l = await this.index.getMetadata(a);
N(o);
const h = t ?? 0, c = n ?? l.maxRefLength;
if (!(h <= c))
throw new TypeError("invalid start and end coordinates. start must be less than or equal to end");
if (h === c)
return;
const f = await this.index.blocksForRange(e, h, c, a);
N(o);
const d = new TextDecoder("utf8");
for (const x of f) {
const { buffer: g, cpositions: w, dpositions: b } = await this.chunkCache.get(x.toString(), x, o);
N(o);
let u = 0, I = 0;
const p = d.decode(g), v = le(p);
for (; u < p.length; ) {
let m, C;
if (v) {
if (C = p.indexOf(`
`, u), C === -1)
break;
m = p.slice(u, C);
} else {
if (C = g.indexOf(10, u), C === -1)
break;
const P = g.slice(u, C);
m = d.decode(P);
}
if (b) {
for (; u + x.minv.dataPosition >= b[I++]; )
;
I--;
}
const { startCoordinate: E, overlaps: F } = this.checkLine(l, e, h, c, m);
if (F)
r(
m,
// cpositions[pos] refers to actual file offset of a bgzip block
// boundaries
//
// we multiply by (1 <<8) in order to make sure each block has a
// "unique" address space so that data in that block could never
// overlap
//
// then the blockStart-dpositions is an uncompressed file offset
// from that bgzip block boundary, and since the cpositions are
// multiplied by (1 << 8) these uncompressed offsets get a unique
// space
w[I] * 256 + (u - b[I]) + x.minv.dataPosition + 1
);
else if (E !== void 0 && E >= c)
return;
u = C + 1;
}
}
}
async getMetadata(e = {}) {
return this.index.getMetadata(e);
}
/**
* get a buffer containing the "header" region of the file, which are the
* bytes up to the first non-meta line
*/
async getHeaderBuffer(e = {}) {
const { firstDataLine: t, metaChar: n, maxBlockSize: i } = await this.getMetadata(e);
N(e.signal);
const o = ((t == null ? void 0 : t.blockPosition) || 0) + i, a = await this.filehandle.read(o, 0, e), r = await V(a);
if (n) {
let l = -1;
const h = 10, c = n.charCodeAt(0);
for (let f = 0; f < r.length && !(f === l + 1 && r[f] !== c); f += 1)
r[f] === h && (l = f);
return r.subarray(0, l + 1);
}
return r;
}
/**
* get a string containing the "header" region of the file, is the portion up
* to the first non-meta line
*
* @returns {Promise} for a string
*/
async getHeader(e = {}) {
const t = new TextDecoder("utf8"), n = await this.getHeaderBuffer(e);
return t.decode(n);
}
/**
* get an array of reference sequence names, in the order in which they occur
* in the file. reference sequence renaming is not applied to these names.
*/
async getReferenceSequenceNames(e = {}) {
return (await this.getMetadata(e)).refIdToName;
}
/**
* @param {object} metadata metadata object from the parsed index, containing
* columnNumbers, metaChar, and format
*
* @param {string} regionRefName
*
* @param {number} regionStart region start coordinate (0-based-half-open)
*
* @param {number} regionEnd region end coordinate (0-based-half-open)
*
* @param {array[string]} line
*
* @returns {object} like `{startCoordinate, overlaps}`. overlaps is boolean,
* true if line is a data line that overlaps the given region
*/
checkLine(e, t, n, i, o) {
const { columnNumbers: a, metaChar: r, coordinateType: l, format: h } = e;
if (r && o.startsWith(r))
return { overlaps: !1 };
let { ref: c, start: f, end: d } = a;
c || (c = 0), f || (f = 0), d || (d = 0), h === "VCF" && (d = 8);
const x = Math.max(c, f, d);
let g = 1, w = 0, b = "", u = -1 / 0;
const I = o.length;
for (let p = 0; p < I + 1; p++)
if (o[p] === " " || p === I) {
if (g === c) {
if (this.renameRefSeq(o.slice(w, p)) !== t)
return {
overlaps: !1
};
} else if (g === f) {
if (u = parseInt(o.slice(w, p), 10), l === "1-based-closed" && (u -= 1), u >= i)
return {
startCoordinate: u,
overlaps: !1
};
if ((d === 0 || d === f) && u + 1 <= n)
return {
startCoordinate: u,
overlaps: !1
};
} else if (h === "VCF" && g === 4)
b = o.slice(w, p);
else if (g === d && (h === "VCF" ? this._getVcfEnd(u, b, o.slice(w, p)) : Number.parseInt(o.slice(w, p), 10)) <= n)
return {
overlaps: !1
};
if (w = p + 1, g += 1, g > x)
break;
}
return {
startCoordinate: u,
overlaps: !0
};
}
_getVcfEnd(e, t, n) {
let i = e + t.length;
const o = n.includes("SVTYPE=TRA");
if (n[0] !== "." && !o) {
let a = ";";
for (let r = 0; r < n.length; r += 1) {
if (a === ";" && n.slice(r, r + 4) === "END=") {
let l = n.indexOf(";", r);
l === -1 && (l = n.length), i = parseInt(n.slice(r + 4, l), 10);
break;
}
a = n[r];
}
} else if (o)
return e + 1;
return i;
}
/**
* return the approximate number of data lines in the given reference
* sequence
*
* @param refSeq reference sequence name
*
* @returns number of data lines present on that reference sequence
*/
async lineCount(e, t = {}) {
return this.index.lineCount(e, t);
}
/**
* read and uncompress the data in a chunk (composed of one or more
* contiguous bgzip blocks) of the file
*/
async readChunk(e, t = {}) {
const n = await this.filehandle.read(e.fetchedSize(), e.minv.blockPosition, t);
return K(n, e);
}
}
export {
me as T
};
//# sourceMappingURL=tabixIndexedFile-WXUhSlHL.js.map