UNPKG

taxonium-component

Version:

React component for exploring large phylogenetic trees in the browser

627 lines (626 loc) 17.3 kB
import { bA as O } from "./JBrowsePanel-uJIA-L6s.js"; function P(o, r, e) { const t = r.split(" "), n = {}; let i = 0; if (o.includes("GT")) { const l = o.split(":"); if (l.length === 1) for (const s of e) n[s] = t[i++]; else { const s = l.indexOf("GT"); if (s === 0) for (const a of e) { const p = t[i++], c = p.indexOf(":"); n[a] = c !== -1 ? p.slice(0, c) : p; } else for (const a of e) { const p = t[i++].split(":"); n[a] = p[s]; } } } return n; } function C(o) { const r = []; let e = "", t = !1, n = !1; for (const i of o) i === '"' ? (t = !t, e += i) : i === "[" ? (n = !0, e += i) : i === "]" ? (n = !1, e += i) : i === "," && !t && !n ? (r.push(e.trim()), e = "") : e += i; return e && r.push(e.trim()), r; } function L(o, r) { const e = o.indexOf(r); return [o.slice(0, e), o.slice(e + 1)]; } function w(o) { const r = o.replace(/^<|>$/g, ""); return Object.fromEntries(C(r).map((e) => { const [t, n] = L(e, "="); return n && n.startsWith("[") && n.endsWith("]") ? [ t, n.slice(1, -1).split(",").map((i) => i.trim()) ] : n && n.startsWith('"') && n.endsWith('"') ? [t, n.slice(1, -1)] : [t, n == null ? void 0 : n.replaceAll(/^"|"$/g, "")]; })); } const b = { // INFO fields InfoFields: { // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf AA: { Number: 1, Type: "String", Description: "Ancestral allele" }, AC: { Number: "A", Type: "Integer", Description: "Allele count in genotypes, for each ALT allele, in the same order as listed" }, AD: { Number: "R", Type: "Integer", Description: "Total read depth for each allele" }, ADF: { Number: "R", Type: "Integer", Description: "Read depth for each allele on the forward strand" }, ADR: { Number: "R", Type: "Integer", Description: "Read depth for each allele on the reverse strand" }, AF: { Number: "A", Type: "Float", Description: "Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)" }, AN: { Number: 1, Type: "Integer", Description: "Total number of alleles in called genotypes" }, BQ: { Number: 1, Type: "Float", Description: "RMS base quality" }, CIGAR: { Number: 1, Type: "Float", Description: "Cigar string describing how to align an alternate allele to the reference allele" }, DB: { Number: 0, Type: "Flag", Description: "dbSNP membership" }, DP: { Number: 1, Type: "Integer", Description: "combined depth across samples" }, END: { Number: 1, Type: "Integer", Description: "End position (for use with symbolic alleles)" }, H2: { Number: 0, Type: "Flag", Description: "HapMap2 membership" }, H3: { Number: 0, Type: "Flag", Description: "HapMap3 membership" }, MQ: { Number: 1, Type: null, Description: "RMS mapping quality" }, MQ0: { Number: 1, Type: "Integer", Description: "Number of MAPQ == 0 reads" }, NS: { Number: 1, Type: "Integer", Description: "Number of samples with data" }, SB: { Number: 4, Type: "Integer", Description: "Strand bias" }, SOMATIC: { Number: 0, Type: "Flag", Description: "Somatic mutation (for cancer genomics)" }, VALIDATED: { Number: 0, Type: "Flag", Description: "Validated by follow-up experiment" }, "1000G": { Number: 0, Type: "Flag", Description: "1000 Genomes membership" }, // specifically for structural variants IMPRECISE: { Number: 0, Type: "Flag", Description: "Imprecise structural variation" }, NOVEL: { Number: 0, Type: "Flag", Description: "Indicates a novel structural variation" }, // For precise variants, END is POS + length of REF allele - 1, // and the for imprecise variants the corresponding best estimate. SVTYPE: { Number: 1, Type: "String", Description: "Type of structural variant" }, // Value should be one of DEL, INS, DUP, INV, CNV, BND. This key can // be derived from the REF/ALT fields but is useful for filtering. SVLEN: { Number: null, Type: "Integer", Description: "Difference in length between REF and ALT alleles" }, // One value for each ALT allele. Longer ALT alleles (e.g. insertions) // have positive values, shorter ALT alleles (e.g. deletions) // have negative values. CIPOS: { Number: 2, Type: "Integer", Description: "Confidence interval around POS for imprecise variants" }, CIEND: { Number: 2, Type: "Integer", Description: "Confidence interval around END for imprecise variants" }, HOMLEN: { Type: "Integer", Description: "Length of base pair identical micro-homology at event breakpoints" }, HOMSEQ: { Type: "String", Description: "Sequence of base pair identical micro-homology at event breakpoints" }, BKPTID: { Type: "String", Description: "ID of the assembled alternate allele in the assembly file" }, // For precise variants, the consensus sequence the alternate allele assembly // is derivable from the REF and ALT fields. However, the alternate allele // assembly file may contain additional information about the characteristics // of the alt allele contigs. MEINFO: { Number: 4, Type: "String", Description: "Mobile element info of the form NAME,START,END,POLARITY" }, METRANS: { Number: 4, Type: "String", Description: "Mobile element transduction info of the form CHR,START,END,POLARITY" }, DGVID: { Number: 1, Type: "String", Description: "ID of this element in Database of Genomic Variation" }, DBVARID: { Number: 1, Type: "String", Description: "ID of this element in DBVAR" }, DBRIPID: { Number: 1, Type: "String", Description: "ID of this element in DBRIP" }, MATEID: { Number: null, Type: "String", Description: "ID of mate breakends" }, PARID: { Number: 1, Type: "String", Description: "ID of partner breakend" }, EVENT: { Number: 1, Type: "String", Description: "ID of event associated to breakend" }, CILEN: { Number: 2, Type: "Integer", Description: "Confidence interval around the inserted material between breakend" }, DPADJ: { Type: "Integer", Description: "Read Depth of adjacency" }, CN: { Number: 1, Type: "Integer", Description: "Copy number of segment containing breakend" }, CNADJ: { Number: null, Type: "Integer", Description: "Copy number of adjacency" }, CICN: { Number: 2, Type: "Integer", Description: "Confidence interval around copy number for the segment" }, CICNADJ: { Number: null, Type: "Integer", Description: "Confidence interval around copy number for the adjacency" } }, // FORMAT fields GenotypeFields: { // from the VCF4.3 spec, https://samtools.github.io/hts-specs/VCFv4.3.pdf AD: { Number: "R", Type: "Integer", Description: "Read depth for each allele" }, ADF: { Number: "R", Type: "Integer", Description: "Read depth for each allele on the forward strand" }, ADR: { Number: "R", Type: "Integer", Description: "Read depth for each allele on the reverse strand" }, DP: { Number: 1, Type: "Integer", Description: "Read depth" }, EC: { Number: "A", Type: "Integer", Description: "Expected alternate allele counts" }, FT: { Number: 1, Type: "String", Description: 'Filter indicating if this genotype was "called"' }, GL: { Number: "G", Type: "Float", Description: "Genotype likelihoods" }, GP: { Number: "G", Type: "Float", Description: "Genotype posterior probabilities" }, GQ: { Number: 1, Type: "Integer", Description: "Conditional genotype quality" }, GT: { Number: 1, Type: "String", Description: "Genotype" }, HQ: { Number: 2, Type: "Integer", Description: "Haplotype quality" }, MQ: { Number: 1, Type: "Integer", Description: "RMS mapping quality" }, PL: { Number: "G", Type: "Integer", Description: "Phred-scaled genotype likelihoods rounded to the closest integer" }, PQ: { Number: 1, Type: "Integer", Description: "Phasing quality" }, PS: { Number: 1, Type: "Integer", Description: "Phase set" } }, // ALT fields AltTypes: { DEL: { Description: "Deletion relative to the reference" }, INS: { Description: "Insertion of novel sequence relative to the reference" }, DUP: { Description: "Region of elevated copy number relative to the reference" }, INV: { Description: "Inversion of reference sequence" }, CNV: { Description: "Copy number variable region (may be both deletion and duplication)" }, "DUP:TANDEM": { Description: "Tandem duplication" }, "DEL:ME": { Description: "Deletion of mobile element relative to the reference" }, "INS:ME": { Description: "Insertion of a mobile element relative to the reference" }, NON_REF: { Description: "Represents any possible alternative allele at this location" }, "*": { Description: "Represents any possible alternative allele at this location" } }, // FILTER fields FilterTypes: { PASS: { Description: "Passed all filters" } } }; function G(o) { try { return decodeURIComponent(o); } catch { return o; } } class Q { constructor({ header: r = "", strict: e = !0 }) { if (!r.length) throw new Error("empty header received"); const t = r.split(/[\r\n]+/).filter(Boolean); if (!t.length) throw new Error("no non-empty header lines specified"); this.strict = e, this.metadata = JSON.parse(JSON.stringify({ INFO: b.InfoFields, FORMAT: b.GenotypeFields, ALT: b.AltTypes, FILTER: b.FilterTypes })); let n; if (t.forEach((a) => { if (a.startsWith("#")) a.startsWith("##") ? this.parseMetadata(a) : n = a; else throw new Error(`Bad line in header: ${a}`); }), !n) throw new Error("No format line found in header"); const i = n.trim().split(" "), l = i.slice(0, 8), s = [ "#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO" ]; if (i.length < 8) throw new Error(`VCF header missing columns: ${n}`); if (l.length !== s.length || !l.every((a, p) => a === s[p])) throw new Error(`VCF column headers not correct: ${n}`); this.samples = i.slice(9); } parseSamples(r, e) { const t = {}; if (r) { const n = e.split(" "), i = r.split(":"), l = i.map((s) => { const a = this.getMetadata("FORMAT", s, "Type"); return a === "Integer" || a === "Float"; }); for (let s = 0; s < this.samples.length; s++) { const a = this.samples[s]; t[a] = {}; const p = n[s].split(":"); for (let c = 0; c < p.length; c++) { const u = p[c]; t[a][i[c]] = u === "" || u === "." ? void 0 : u.split(",").map((m) => m === "." ? void 0 : l[c] ? +m : m); } } } return t; } /** * Parse a VCF metadata line (i.e. a line that starts with "##") and add its * properties to the object. * * @param {string} line - A line from the VCF. Supports both LF and CRLF * newlines. */ parseMetadata(r) { const e = /^##(.+?)=(.*)/.exec(r.trim()); if (!e) throw new Error(`Line is not a valid metadata line: ${r}`); const [t, n] = e.slice(1, 3), i = t; if (n != null && n.startsWith("<")) { i in this.metadata || (this.metadata[i] = {}); const [l, s] = this.parseStructuredMetaVal(n); l ? this.metadata[i][l] = s : this.metadata[i] = s; } else this.metadata[i] = n; } /** * Parse a VCF header structured meta string (i.e. a meta value that starts * with "<ID=...") * * @param {string} metaVal - The VCF metadata value * * @returns {Array} - Array with two entries, 1) a string of the metadata ID * and 2) an object with the other key-value pairs in the metadata */ parseStructuredMetaVal(r) { const e = w(r), t = e.ID; return delete e.ID, "Number" in e && (Number.isNaN(Number(e.Number)) || (e.Number = Number(e.Number))), [t, e]; } /** * Get metadata filtered by the elements in args. For example, can pass * ('INFO', 'DP') to only get info on an metadata tag that was like * "##INFO=<ID=DP,...>" * * @param {...string} args - List of metadata filter strings. * * @returns {any} An object, string, or number, depending on the filtering */ getMetadata(...r) { let e = this.metadata; for (const t of r) if (e = e[t], !e) return e; return e; } /** * Parse a VCF line into an object like * * ```typescript * { * CHROM: 'contigA', * POS: 3000, * ID: ['rs17883296'], * REF: 'G', * ALT: ['T', 'A'], * QUAL: 100, * FILTER: 'PASS', * INFO: { * NS: [3], * DP: [14], * AF: [0.5], * DB: true, * XYZ: ['5'], * }, * SAMPLES: () => ({ * HG00096: { * GT: ['0|0'], * AP: ['0.000', '0.000'], * } * }), * GENOTYPES: () => ({ * HG00096: '0|0' * }) * } * ``` * * SAMPLES and GENOTYPES methods are functions instead of static data fields * because it avoids parsing the potentially long list of samples from e.g. * 1000 genotypes data unless requested. * * The SAMPLES function gives all info about the samples * * The GENOTYPES function only extracts the raw GT string if it exists, for * potentially optimized parsing by programs that need it * * @param {string} line - A string of a line from a VCF */ parseLine(r) { var g; let e = 0; for (let h = 0; e < r.length && (r[e] === " " && (h += 1), h !== 9); e += 1) ; const t = r.slice(0, e).split(" "), n = r.slice(e + 1), [i, l, s, a, p, c, u] = t, m = i, I = +l, S = s === "." ? void 0 : s.split(";"), A = a, E = p === "." ? void 0 : p.split(","), R = c === "." ? void 0 : +c, f = u === "." ? void 0 : u.split(";"), F = t[8]; if (this.strict && !t[7]) throw new Error("no INFO field specified, must contain at least a '.' (turn off strict mode to allow)"); const v = (g = t[7]) == null ? void 0 : g.includes("%"), M = t[7] === void 0 || t[7] === "." ? {} : Object.fromEntries(t[7].split(";").map((h) => { const [D, N] = h.split("="), y = N == null ? void 0 : N.split(",").map((d) => d === "." ? void 0 : d).map((d) => d && v ? G(d) : d), T = this.getMetadata("INFO", D, "Type"); return T === "Integer" || T === "Float" ? [ D, y == null ? void 0 : y.map((d) => d === void 0 ? void 0 : Number(d)) ] : T === "Flag" ? [D, !0] : [D, y ?? !0]; })); return { CHROM: m, POS: I, ALT: E, INFO: M, REF: A, FILTER: f && f.length === 1 && f[0] === "PASS" ? "PASS" : f, ID: S, QUAL: R, FORMAT: F, SAMPLES: () => this.parseSamples(t[8] ?? "", n), GENOTYPES: () => P(t[8] ?? "", n, this.samples) }; } } function V(o, r) { const { REF: e = "", ALT: t, POS: n, CHROM: i, ID: l } = o, s = n - 1, [a, p] = O(e, t, r); return { refName: i, start: s, end: k(o), description: p, type: a, name: l == null ? void 0 : l.join(",") }; } function k(o) { const { POS: r, REF: e = "", ALT: t } = o, n = t == null ? void 0 : t.includes("<TRA>"), i = r - 1; if (t == null ? void 0 : t.some((s) => s.includes("<"))) { const s = o.INFO; if (s.END && !n) return +s.END[0]; } return i + e.length; } class q { constructor(r) { this.variant = r.variant, this.parser = r.parser, this.data = V(this.variant, this.parser), this._id = r.id; } get(r) { var e; return r === "samples" ? this.variant.SAMPLES() : r === "genotypes" ? this.variant.GENOTYPES() : (e = this.data[r]) !== null && e !== void 0 ? e : this.variant[r]; } parent() { } children() { } id() { return this._id; } toJSON() { const { SAMPLES: r, GENOTYPES: e, ...t } = this.variant; return { uniqueId: this._id, ...t, ...this.data, samples: this.variant.SAMPLES() }; } } export { Q as V, q as a }; //# sourceMappingURL=index-DYYPI5hS.js.map