UNPKG

ideogram

Version:

Chromosome visualization for the web

115 lines (98 loc) 3.48 kB
import {fetchAndParse, getFullId, inspectWorker} from './cache-lib'; /** * Convert pre-annotation arrays to annotation objects * sorted by genomic position. */ function parseAnnots(preAnnots) { const chromosomes = {}; for (let i = 0; i < preAnnots.length; i++) { const [chromosome, start, stop, ensemblId, gene] = preAnnots[i]; if (!(chromosome in chromosomes)) { chromosomes[chromosome] = {chr: chromosome, annots: []}; } else { const annot = {name: gene, start, stop, ensemblId}; chromosomes[chromosome].annots.push(annot); } } const annotsSortedByPosition = {}; Object.entries(chromosomes).forEach(([chr, annotsByChr]) => { annotsSortedByPosition[chr] = { chr, annots: annotsByChr.annots.sort((a, b) => a.start - b.start) }; }); return annotsSortedByPosition; } /** Parse a gene cache TSV file, return array of useful transforms */ export function parseGeneCache(rawTsv, perfTimes) { const names = []; const nameCaseMap = {}; const namesById = {}; const fullNamesById = {}; const idsByName = {}; const lociByName = {}; const lociById = {}; const preAnnots = []; // If the gene has among top 2% expression in a tissue (per GTEx), that's // tracked here. const tissueIdsByName = {}; let ensemblPrefix; let t0 = performance.now(); const lines = rawTsv.split(/\r\n|\n/); perfTimes.rawTsvSplit = Math.round(performance.now() - t0); t0 = performance.now(); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line === '') continue; // Skip empty lines if (line[0] === '#') { if (line.slice(0, 9) === '## prefix') { ensemblPrefix = line.split('prefix: ')[1]; } continue; } const [ chromosome, rawStart, rawLength, slimEnsemblId, gene, rawFullName, tissueIds ] = line.trim().split(/\t/); const fullName = decodeURIComponent(rawFullName); const start = parseInt(rawStart); const stop = start + parseInt(rawLength); const ensemblId = getFullId(ensemblPrefix, slimEnsemblId); preAnnots.push([chromosome, start, stop, ensemblId, gene, fullName]); const locus = [chromosome, start, stop]; names.push(gene); nameCaseMap[gene.toLowerCase()] = gene; namesById[ensemblId] = gene; fullNamesById[ensemblId] = fullName; idsByName[gene] = ensemblId; lociByName[gene] = locus; lociById[ensemblId] = locus; if (tissueIds !== undefined) { const processedTissueIds = []; const splitTissueIds = tissueIds.split(','); for (let i = 0; i < splitTissueIds.length; i++) { processedTissueIds.push(parseInt(splitTissueIds[i], 10)); } tissueIdsByName[gene] = processedTissueIds; } }; const t1 = performance.now(); perfTimes.parseCacheLoop = Math.round(t1 - t0); // const sortedAnnots = parseAnnots(preAnnots); perfTimes.parseAnnots = Math.round(performance.now() - t1); return [ names, nameCaseMap, namesById, fullNamesById, idsByName, lociByName, lociById // , sortedAnnots ]; } // Uncomment when workers work outside localhost // addEventListener('message', async event => { // console.time('geneCacheWorker'); // // console.log('in gene cache worker message handler'); // const [cacheUrl, perfTimes, debug] = event.data; // const result = await fetchAndParse(cacheUrl, perfTimes, parseCache); // postMessage(result); // if (debug) inspectWorker('gene', result[0]); // });