UNPKG

ideogram

Version:

Chromosome visualization for the web

103 lines (85 loc) 3.08 kB
import {fetchAndParse, inspectWorker} from './cache-lib'; /** Parse compressed feature subparts to more easily computable format */ function deserializeSubparts(rawSubparts, subpartKeys) { const subparts = []; for (let i = 0; i < rawSubparts.length; i++) { const rawSubpart = rawSubparts[i].split(';'); const subpartType = subpartKeys[parseInt(rawSubpart[0])]; const start = parseInt(rawSubpart[1]); const length = parseInt(rawSubpart[2]); const subpart = [subpartType, start, length]; subparts.push(subpart); } return subparts; } /** Parse metainformation header lines, i.e. those beginning "## "" */ function parseMetainformationHeader(line) { const splitHead = line.split(' keys: '); if (splitHead.length < 2) return [null]; const metaHeader = splitHead[0].split('## ')[1]; const keys = {}; splitHead[1].split(', ').forEach(entry => { const splitEntry = entry.split(' = '); keys[splitEntry[0]] = splitEntry[1]; }); return [metaHeader, keys]; } /** Parse a gene structure cache TSV file, return array of useful transforms */ export function parseGeneStructureCache(rawTsv, perfTimes) { const featuresByGene = {}; let t0 = performance.now(); const lines = rawTsv.split(/\r\n|\n/); perfTimes.rawTsvSplit = Math.round(performance.now() - t0); let biotypeKeys, subpartKeys; t0 = performance.now(); for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line === '') continue; // Skip empty lines // Parse header if (line[0] === '#') { // Parse metainformation headers if (line[1] === '#') { const [metaHeader, keys] = parseMetainformationHeader(line); if (metaHeader === 'biotype') { biotypeKeys = keys; } else if (metaHeader === 'subpart') { subpartKeys = keys; } } continue; } const splitLine = line.trim().split(/\t/); const [ name, rawStartOffset, biotypeCompressed, strand ] = splitLine.slice(0, 4); const startOffset = parseInt(rawStartOffset); const gene = name.split('-').slice(0, -1).join('-'); const rawSubparts = splitLine.slice(4); const subparts = deserializeSubparts(rawSubparts, subpartKeys); const biotype = biotypeKeys[biotypeCompressed]; // E.g. ACE2-201, protein_coding, -, <array of exon or UTR arrays> const feature = { name, startOffset, biotype, strand, subparts }; if (gene in featuresByGene) { featuresByGene[gene].push(feature); } else { featuresByGene[gene] = [feature]; } }; const t1 = performance.now(); perfTimes.parseCacheLoop = Math.round(t1 - t0); return featuresByGene; } // Uncomment when workers work outside localhost // addEventListener('message', async event => { // console.time('geneStructureCacheWorker'); // const [cacheUrl, perfTimes, debug] = event.data; // const result = await fetchAndParse(cacheUrl, perfTimes, parseCache); // postMessage(result); // if (debug) inspectWorker('geneStructure', result[0]); // });