UNPKG

ideogram

Version:

Chromosome visualization for the web

106 lines (89 loc) 3.29 kB
import {fetchAndParse, inspectWorker, getFullId} from './cache-lib'; /** Parse compressed protein to more easily computable format */ function deserializeProtein(rawProtein, domainKeys) { const domains = []; for (let i = 0; i < rawProtein.length; i++) { const rawDomain = rawProtein[i].split(';'); // const domainID = getFullId('IPR', rawDomain[0], 6); const domainName = domainKeys[rawDomain[0]]; const start = parseInt(rawDomain[1]); const length = parseInt(rawDomain[2]); const domain = [domainName, start, length]; domains.push(domain); } return domains; } /** Parse metainformation header lines, i.e. those beginning "## "" */ function parseMetainformationHeader(line) { const splitHead = line.split(' keys: '); if (splitHead.length < 2) return [null]; const metaHeader = splitHead[0].split('## ')[1]; const keys = {}; splitHead[1].split('; ').forEach(entry => { const splitEntry = entry.split(' = '); keys[splitEntry[0]] = splitEntry[1]; }); return [metaHeader, keys]; } /** Parse a protein cache TSV file, return array of useful transforms */ export function parseProteinCache(rawTsv, perfTimes) { const featuresByGene = {}; let t0 = performance.now(); const lines = rawTsv.split(/\r\n|\n/); perfTimes.rawTsvSplit = Math.round(performance.now() - t0); let domainKeys; t0 = performance.now(); let gene = null; let refTxBaseNum; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line === '') continue; // Skip empty lines // Parse header if (line[0] === '#') { // Parse metainformation headers if (line[1] === '#') { const [metaHeader, keys] = parseMetainformationHeader(line); if (metaHeader === 'domain') { domainKeys = keys; } } continue; } const splitLine = line.trim().split(/\t/); let transcriptName = splitLine[0]; if (isNaN(transcriptName)) { const splitTxName = transcriptName.split('-'); const txNum = splitTxName.slice(-1)[0]; // e.g. 208 in ACE2-208 const highestDigit = parseInt(txNum[0]); // e.g. 2 in 208 const numDigits = txNum.length; refTxBaseNum = highestDigit * (10 ** (numDigits - 1)); gene = splitTxName.slice(0, -1).join('-'); } else { transcriptName = refTxBaseNum + parseInt(transcriptName); transcriptName = gene + '-' + transcriptName; } const rawProtein = splitLine.slice(1); const protein = deserializeProtein(rawProtein, domainKeys); // E.g. ACE2-201, <array of proteins <protein name, protein ID, start, length>> const feature = { transcriptName, protein }; if (gene in featuresByGene) { featuresByGene[gene].push(feature); } else { featuresByGene[gene] = [feature]; } }; const t1 = performance.now(); perfTimes.parseCacheLoop = Math.round(t1 - t0); return featuresByGene; } // Uncomment when workers work outside localhost // addEventListener('message', async event => { // console.time('proteinCacheWorker'); // const [cacheUrl, perfTimes, debug] = event.data; // const result = await fetchAndParse(cacheUrl, perfTimes, parseCache); // postMessage(result); // if (debug) inspectWorker('proteinStructure', result[0]); // });