taxonium-component
Version:
React component for exploring large phylogenetic trees in the browser
627 lines (626 loc) • 17.3 kB
JavaScript
import { bA as O } from "./JBrowsePanel-uJIA-L6s.js";
function P(o, r, e) {
const t = r.split(" "), n = {};
let i = 0;
if (o.includes("GT")) {
const l = o.split(":");
if (l.length === 1)
for (const s of e)
n[s] = t[i++];
else {
const s = l.indexOf("GT");
if (s === 0)
for (const a of e) {
const p = t[i++], c = p.indexOf(":");
n[a] = c !== -1 ? p.slice(0, c) : p;
}
else
for (const a of e) {
const p = t[i++].split(":");
n[a] = p[s];
}
}
}
return n;
}
function C(o) {
const r = [];
let e = "", t = !1, n = !1;
for (const i of o)
i === '"' ? (t = !t, e += i) : i === "[" ? (n = !0, e += i) : i === "]" ? (n = !1, e += i) : i === "," && !t && !n ? (r.push(e.trim()), e = "") : e += i;
return e && r.push(e.trim()), r;
}
function L(o, r) {
const e = o.indexOf(r);
return [o.slice(0, e), o.slice(e + 1)];
}
function w(o) {
const r = o.replace(/^<|>$/g, "");
return Object.fromEntries(C(r).map((e) => {
const [t, n] = L(e, "=");
return n && n.startsWith("[") && n.endsWith("]") ? [
t,
n.slice(1, -1).split(",").map((i) => i.trim())
] : n && n.startsWith('"') && n.endsWith('"') ? [t, n.slice(1, -1)] : [t, n == null ? void 0 : n.replaceAll(/^"|"$/g, "")];
}));
}
const b = {
// INFO fields
InfoFields: {
// from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf
AA: { Number: 1, Type: "String", Description: "Ancestral allele" },
AC: {
Number: "A",
Type: "Integer",
Description: "Allele count in genotypes, for each ALT allele, in the same order as listed"
},
AD: {
Number: "R",
Type: "Integer",
Description: "Total read depth for each allele"
},
ADF: {
Number: "R",
Type: "Integer",
Description: "Read depth for each allele on the forward strand"
},
ADR: {
Number: "R",
Type: "Integer",
Description: "Read depth for each allele on the reverse strand"
},
AF: {
Number: "A",
Type: "Float",
Description: "Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)"
},
AN: {
Number: 1,
Type: "Integer",
Description: "Total number of alleles in called genotypes"
},
BQ: {
Number: 1,
Type: "Float",
Description: "RMS base quality"
},
CIGAR: {
Number: 1,
Type: "Float",
Description: "Cigar string describing how to align an alternate allele to the reference allele"
},
DB: {
Number: 0,
Type: "Flag",
Description: "dbSNP membership"
},
DP: {
Number: 1,
Type: "Integer",
Description: "combined depth across samples"
},
END: {
Number: 1,
Type: "Integer",
Description: "End position (for use with symbolic alleles)"
},
H2: {
Number: 0,
Type: "Flag",
Description: "HapMap2 membership"
},
H3: {
Number: 0,
Type: "Flag",
Description: "HapMap3 membership"
},
MQ: {
Number: 1,
Type: null,
Description: "RMS mapping quality"
},
MQ0: {
Number: 1,
Type: "Integer",
Description: "Number of MAPQ == 0 reads"
},
NS: {
Number: 1,
Type: "Integer",
Description: "Number of samples with data"
},
SB: {
Number: 4,
Type: "Integer",
Description: "Strand bias"
},
SOMATIC: {
Number: 0,
Type: "Flag",
Description: "Somatic mutation (for cancer genomics)"
},
VALIDATED: {
Number: 0,
Type: "Flag",
Description: "Validated by follow-up experiment"
},
"1000G": {
Number: 0,
Type: "Flag",
Description: "1000 Genomes membership"
},
// specifically for structural variants
IMPRECISE: {
Number: 0,
Type: "Flag",
Description: "Imprecise structural variation"
},
NOVEL: {
Number: 0,
Type: "Flag",
Description: "Indicates a novel structural variation"
},
// For precise variants, END is POS + length of REF allele - 1,
// and the for imprecise variants the corresponding best estimate.
SVTYPE: {
Number: 1,
Type: "String",
Description: "Type of structural variant"
},
// Value should be one of DEL, INS, DUP, INV, CNV, BND. This key can
// be derived from the REF/ALT fields but is useful for filtering.
SVLEN: {
Number: null,
Type: "Integer",
Description: "Difference in length between REF and ALT alleles"
},
// One value for each ALT allele. Longer ALT alleles (e.g. insertions)
// have positive values, shorter ALT alleles (e.g. deletions)
// have negative values.
CIPOS: {
Number: 2,
Type: "Integer",
Description: "Confidence interval around POS for imprecise variants"
},
CIEND: {
Number: 2,
Type: "Integer",
Description: "Confidence interval around END for imprecise variants"
},
HOMLEN: {
Type: "Integer",
Description: "Length of base pair identical micro-homology at event breakpoints"
},
HOMSEQ: {
Type: "String",
Description: "Sequence of base pair identical micro-homology at event breakpoints"
},
BKPTID: {
Type: "String",
Description: "ID of the assembled alternate allele in the assembly file"
},
// For precise variants, the consensus sequence the alternate allele assembly
// is derivable from the REF and ALT fields. However, the alternate allele
// assembly file may contain additional information about the characteristics
// of the alt allele contigs.
MEINFO: {
Number: 4,
Type: "String",
Description: "Mobile element info of the form NAME,START,END,POLARITY"
},
METRANS: {
Number: 4,
Type: "String",
Description: "Mobile element transduction info of the form CHR,START,END,POLARITY"
},
DGVID: {
Number: 1,
Type: "String",
Description: "ID of this element in Database of Genomic Variation"
},
DBVARID: {
Number: 1,
Type: "String",
Description: "ID of this element in DBVAR"
},
DBRIPID: {
Number: 1,
Type: "String",
Description: "ID of this element in DBRIP"
},
MATEID: {
Number: null,
Type: "String",
Description: "ID of mate breakends"
},
PARID: {
Number: 1,
Type: "String",
Description: "ID of partner breakend"
},
EVENT: {
Number: 1,
Type: "String",
Description: "ID of event associated to breakend"
},
CILEN: {
Number: 2,
Type: "Integer",
Description: "Confidence interval around the inserted material between breakend"
},
DPADJ: { Type: "Integer", Description: "Read Depth of adjacency" },
CN: {
Number: 1,
Type: "Integer",
Description: "Copy number of segment containing breakend"
},
CNADJ: {
Number: null,
Type: "Integer",
Description: "Copy number of adjacency"
},
CICN: {
Number: 2,
Type: "Integer",
Description: "Confidence interval around copy number for the segment"
},
CICNADJ: {
Number: null,
Type: "Integer",
Description: "Confidence interval around copy number for the adjacency"
}
},
// FORMAT fields
GenotypeFields: {
// from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf
AD: {
Number: "R",
Type: "Integer",
Description: "Read depth for each allele"
},
ADF: {
Number: "R",
Type: "Integer",
Description: "Read depth for each allele on the forward strand"
},
ADR: {
Number: "R",
Type: "Integer",
Description: "Read depth for each allele on the reverse strand"
},
DP: {
Number: 1,
Type: "Integer",
Description: "Read depth"
},
EC: {
Number: "A",
Type: "Integer",
Description: "Expected alternate allele counts"
},
FT: {
Number: 1,
Type: "String",
Description: 'Filter indicating if this genotype was "called"'
},
GL: {
Number: "G",
Type: "Float",
Description: "Genotype likelihoods"
},
GP: {
Number: "G",
Type: "Float",
Description: "Genotype posterior probabilities"
},
GQ: {
Number: 1,
Type: "Integer",
Description: "Conditional genotype quality"
},
GT: {
Number: 1,
Type: "String",
Description: "Genotype"
},
HQ: {
Number: 2,
Type: "Integer",
Description: "Haplotype quality"
},
MQ: {
Number: 1,
Type: "Integer",
Description: "RMS mapping quality"
},
PL: {
Number: "G",
Type: "Integer",
Description: "Phred-scaled genotype likelihoods rounded to the closest integer"
},
PQ: {
Number: 1,
Type: "Integer",
Description: "Phasing quality"
},
PS: {
Number: 1,
Type: "Integer",
Description: "Phase set"
}
},
// ALT fields
AltTypes: {
DEL: {
Description: "Deletion relative to the reference"
},
INS: {
Description: "Insertion of novel sequence relative to the reference"
},
DUP: {
Description: "Region of elevated copy number relative to the reference"
},
INV: {
Description: "Inversion of reference sequence"
},
CNV: {
Description: "Copy number variable region (may be both deletion and duplication)"
},
"DUP:TANDEM": {
Description: "Tandem duplication"
},
"DEL:ME": {
Description: "Deletion of mobile element relative to the reference"
},
"INS:ME": {
Description: "Insertion of a mobile element relative to the reference"
},
NON_REF: {
Description: "Represents any possible alternative allele at this location"
},
"*": {
Description: "Represents any possible alternative allele at this location"
}
},
// FILTER fields
FilterTypes: {
PASS: {
Description: "Passed all filters"
}
}
};
function G(o) {
try {
return decodeURIComponent(o);
} catch {
return o;
}
}
class Q {
constructor({ header: r = "", strict: e = !0 }) {
if (!r.length)
throw new Error("empty header received");
const t = r.split(/[\r\n]+/).filter(Boolean);
if (!t.length)
throw new Error("no non-empty header lines specified");
this.strict = e, this.metadata = JSON.parse(JSON.stringify({
INFO: b.InfoFields,
FORMAT: b.GenotypeFields,
ALT: b.AltTypes,
FILTER: b.FilterTypes
}));
let n;
if (t.forEach((a) => {
if (a.startsWith("#"))
a.startsWith("##") ? this.parseMetadata(a) : n = a;
else throw new Error(`Bad line in header:
${a}`);
}), !n)
throw new Error("No format line found in header");
const i = n.trim().split(" "), l = i.slice(0, 8), s = [
"#CHROM",
"POS",
"ID",
"REF",
"ALT",
"QUAL",
"FILTER",
"INFO"
];
if (i.length < 8)
throw new Error(`VCF header missing columns:
${n}`);
if (l.length !== s.length || !l.every((a, p) => a === s[p]))
throw new Error(`VCF column headers not correct:
${n}`);
this.samples = i.slice(9);
}
parseSamples(r, e) {
const t = {};
if (r) {
const n = e.split(" "), i = r.split(":"), l = i.map((s) => {
const a = this.getMetadata("FORMAT", s, "Type");
return a === "Integer" || a === "Float";
});
for (let s = 0; s < this.samples.length; s++) {
const a = this.samples[s];
t[a] = {};
const p = n[s].split(":");
for (let c = 0; c < p.length; c++) {
const u = p[c];
t[a][i[c]] = u === "" || u === "." ? void 0 : u.split(",").map((m) => m === "." ? void 0 : l[c] ? +m : m);
}
}
}
return t;
}
/**
* Parse a VCF metadata line (i.e. a line that starts with "##") and add its
* properties to the object.
*
* @param {string} line - A line from the VCF. Supports both LF and CRLF
* newlines.
*/
parseMetadata(r) {
const e = /^##(.+?)=(.*)/.exec(r.trim());
if (!e)
throw new Error(`Line is not a valid metadata line: ${r}`);
const [t, n] = e.slice(1, 3), i = t;
if (n != null && n.startsWith("<")) {
i in this.metadata || (this.metadata[i] = {});
const [l, s] = this.parseStructuredMetaVal(n);
l ? this.metadata[i][l] = s : this.metadata[i] = s;
} else
this.metadata[i] = n;
}
/**
* Parse a VCF header structured meta string (i.e. a meta value that starts
* with "<ID=...")
*
* @param {string} metaVal - The VCF metadata value
*
* @returns {Array} - Array with two entries, 1) a string of the metadata ID
* and 2) an object with the other key-value pairs in the metadata
*/
parseStructuredMetaVal(r) {
const e = w(r), t = e.ID;
return delete e.ID, "Number" in e && (Number.isNaN(Number(e.Number)) || (e.Number = Number(e.Number))), [t, e];
}
/**
* Get metadata filtered by the elements in args. For example, can pass
* ('INFO', 'DP') to only get info on an metadata tag that was like
* "##INFO=<ID=DP,...>"
*
* @param {...string} args - List of metadata filter strings.
*
* @returns {any} An object, string, or number, depending on the filtering
*/
getMetadata(...r) {
let e = this.metadata;
for (const t of r)
if (e = e[t], !e)
return e;
return e;
}
/**
* Parse a VCF line into an object like
*
* ```typescript
* {
* CHROM: 'contigA',
* POS: 3000,
* ID: ['rs17883296'],
* REF: 'G',
* ALT: ['T', 'A'],
* QUAL: 100,
* FILTER: 'PASS',
* INFO: {
* NS: [3],
* DP: [14],
* AF: [0.5],
* DB: true,
* XYZ: ['5'],
* },
* SAMPLES: () => ({
* HG00096: {
* GT: ['0|0'],
* AP: ['0.000', '0.000'],
* }
* }),
* GENOTYPES: () => ({
* HG00096: '0|0'
* })
* }
* ```
*
* SAMPLES and GENOTYPES methods are functions instead of static data fields
* because it avoids parsing the potentially long list of samples from e.g.
* 1000 genotypes data unless requested.
*
* The SAMPLES function gives all info about the samples
*
* The GENOTYPES function only extracts the raw GT string if it exists, for
* potentially optimized parsing by programs that need it
*
* @param {string} line - A string of a line from a VCF
*/
parseLine(r) {
var g;
let e = 0;
for (let h = 0; e < r.length && (r[e] === " " && (h += 1), h !== 9); e += 1)
;
const t = r.slice(0, e).split(" "), n = r.slice(e + 1), [i, l, s, a, p, c, u] = t, m = i, I = +l, S = s === "." ? void 0 : s.split(";"), A = a, E = p === "." ? void 0 : p.split(","), R = c === "." ? void 0 : +c, f = u === "." ? void 0 : u.split(";"), F = t[8];
if (this.strict && !t[7])
throw new Error("no INFO field specified, must contain at least a '.' (turn off strict mode to allow)");
const v = (g = t[7]) == null ? void 0 : g.includes("%"), M = t[7] === void 0 || t[7] === "." ? {} : Object.fromEntries(t[7].split(";").map((h) => {
const [D, N] = h.split("="), y = N == null ? void 0 : N.split(",").map((d) => d === "." ? void 0 : d).map((d) => d && v ? G(d) : d), T = this.getMetadata("INFO", D, "Type");
return T === "Integer" || T === "Float" ? [
D,
y == null ? void 0 : y.map((d) => d === void 0 ? void 0 : Number(d))
] : T === "Flag" ? [D, !0] : [D, y ?? !0];
}));
return {
CHROM: m,
POS: I,
ALT: E,
INFO: M,
REF: A,
FILTER: f && f.length === 1 && f[0] === "PASS" ? "PASS" : f,
ID: S,
QUAL: R,
FORMAT: F,
SAMPLES: () => this.parseSamples(t[8] ?? "", n),
GENOTYPES: () => P(t[8] ?? "", n, this.samples)
};
}
}
function V(o, r) {
const { REF: e = "", ALT: t, POS: n, CHROM: i, ID: l } = o, s = n - 1, [a, p] = O(e, t, r);
return {
refName: i,
start: s,
end: k(o),
description: p,
type: a,
name: l == null ? void 0 : l.join(",")
};
}
function k(o) {
const { POS: r, REF: e = "", ALT: t } = o, n = t == null ? void 0 : t.includes("<TRA>"), i = r - 1;
if (t == null ? void 0 : t.some((s) => s.includes("<"))) {
const s = o.INFO;
if (s.END && !n)
return +s.END[0];
}
return i + e.length;
}
class q {
constructor(r) {
this.variant = r.variant, this.parser = r.parser, this.data = V(this.variant, this.parser), this._id = r.id;
}
get(r) {
var e;
return r === "samples" ? this.variant.SAMPLES() : r === "genotypes" ? this.variant.GENOTYPES() : (e = this.data[r]) !== null && e !== void 0 ? e : this.variant[r];
}
parent() {
}
children() {
}
id() {
return this._id;
}
toJSON() {
const { SAMPLES: r, GENOTYPES: e, ...t } = this.variant;
return {
uniqueId: this._id,
...t,
...this.data,
samples: this.variant.SAMPLES()
};
}
}
export {
Q as V,
q as a
};
//# sourceMappingURL=index-DYYPI5hS.js.map