ideogram
Version:
Chromosome visualization for the web
286 lines (245 loc) • 8.95 kB
JavaScript
/**
* @fileoverview Functions to render 2D protein structure, i.e. linear domains
*
* The protein diagrams are shown in the Gene Leads tooltip.
*/
import {addPositions, getGeneFromStructureName, pipe} from './gene-structure';
import {getColors} from './protein-color';
/** Get subtle line to visually demarcate feature boundary */
function getFeatureBorderLines(
x, y, width, baseHeight, lineColor, addTopBottom=false
) {
const height = y + baseHeight;
const lineStroke = `stroke="${lineColor}"`;
const leftLineAttrs =
`x1="${x}" x2="${x}" y1="${y}" y2="${height}" ${lineStroke}`;
const x2 = x + width;
const rightLineAttrs =
`x1="${x2}" x2="${x2}" y1="${y}" y2="${height}" ${lineStroke}`;
const startBorder = `<line class="subpart-line" ${leftLineAttrs} />`;
const endBorder = `<line class="subpart-line" ${rightLineAttrs} />`;
// Added to feature if any part of protein has topology data
let topBorder = '';
let bottomBorder = '';
if (addTopBottom) {
const topLineAttrs =
`x1="${x}" x2="${x2}" y1="${y}" y2="${y}" ${lineStroke}`;
const bottomLineAttrs =
`x1="${x}" x2="${x2}" y1="${height}" y2="${height}" ${lineStroke}`;
topBorder = `<line class="subpart-line" ${topLineAttrs} />`;
bottomBorder = `<line class="subpart-line" ${bottomLineAttrs} />`;
}
return startBorder + endBorder + topBorder + bottomBorder;
}
/** Get start and length for coding sequence (CDS), in pixels and base pairs */
function getCdsCoordinates(subparts, isPositiveStrand) {
// Test case: XRCC3 (-, multiple 5'-UTRs), RAD51D (big 3'-UTR)
if (!isPositiveStrand) subparts = subparts.slice().reverse();
// Start of CDS is end of last 5'-UTR, for default case (positive strand)
const startUtr = isPositiveStrand ? "5'-UTR" : "3'-UTR";
const lastStartUtr = subparts.filter(s => s[0] === startUtr).slice(-1)[0];
let startPx, startBp;
if (lastStartUtr) {
startPx = lastStartUtr.slice(-1)[0].x + lastStartUtr.slice(-1)[0].width;
startBp = lastStartUtr[1] + lastStartUtr[2];
} else {
// For transcipts without an annotated start UTR, e.g. EGFR-205
startPx = 0;
startBp = 0;
}
// End of CDS is start of first 3'-UTR, for default case
const endUtr = isPositiveStrand ? "3'-UTR" : "5'-UTR";
const firstEndUtr = subparts.filter(s => s[0] === endUtr).slice(-1)[0];
let stopPx, stopBp;
if (firstEndUtr) {
stopPx = firstEndUtr.slice(-1)[0].x;
stopBp = firstEndUtr[1];
} else {
// For transcipts without an annotated last UTR, e.g. EGFR-205
const lastSubpart = subparts.slice(-1)[0];
stopPx = lastSubpart.slice(-1)[0].x + lastSubpart.slice(-1)[0].width;
stopBp = lastSubpart[1] + lastSubpart[2];
}
const lengthBp = stopBp - startBp;
const lengthPx = stopPx - startPx;
const cdsCoordinates = {
px: {start: startPx, length: lengthPx},
bp: {start: startBp, length: lengthBp}
};
return cdsCoordinates;
}
function isTopologyFeature(feature) {
return feature[0][0] === '_';
}
function isSignalPeptideFeature(feature) {
return feature[0] === 'S';
}
const topologyFeatureMap = {
'_H': 'Helical',
'_E': 'Extracellular',
'_C': 'Cytoplasmic'
};
function decompressTopologyFeature(feature) {
if (feature in topologyFeatureMap) {
return topologyFeatureMap[feature];
} else {
return feature.slice(1);
}
}
/** Get SVG for an inidividual protein domain */
function getFeatureSvg(feature, cds, isPositiveStrand, hasTopology) {
let featureType = feature[0];
const featurePx = feature.slice(-1)[0];
let x = cds.px.start + featurePx.x;
let width = featurePx.width;
if (!isPositiveStrand) {
x = cds.px.length + cds.px.start - (featurePx.x + featurePx.width);
};
// Perhaps make these configurable, later
let y = 30;
let height = 14;
const isTopology = isTopologyFeature(feature);
const isSignal = isSignalPeptideFeature(feature);
let topoAttr = '';
if (hasTopology) {
y = 38;
if (isTopology) {
featureType = decompressTopologyFeature(feature[0]);
y = 30;
height = 30;
const featureDigest = `${feature[0]} ${feature[1]} ${feature[2]}`;
if (
// E.g. EGF-206 alternative isoform, C-terminal cytoplasmic domain
isPositiveStrand && featurePx.x + featurePx.width > cds.px.length + 3 ||
// E.g. SCARB1-201 canonical isoform, C-terminal cytoplasmic domain
!isPositiveStrand && featurePx.x + featurePx.width > cds.px.length + 3
) {
console.debug(`Truncate protein topology feature: ${featureDigest}`);
width -= (featurePx.x + featurePx.width) - cds.px.length;
if (!isPositiveStrand) {
x += width;
}
}
topoAttr = 'data-topology="true"';
if (width < 0) {
// E.g. LDLR-202 alternative isoform, multiple features
const issue = 'Width < 0, omit protein topology feature';
console.debug(`${issue}: ${featureDigest}`);
return '';
};
}
}
const [color, lineColor] = getColors(featureType);
if (isSignal) {
featureType = 'Signal peptide';
height = 8;
y += 3;
}
if (isTopology) {
if (featureType === 'Helical') {
featureType = 'Transmembrane';
} else if (featureType.startsWith('Helical --- Name=')) {
featureType = featureType.replace('Helical --- Name=', 'Transmembrane: ');
}
}
const lengthAa = `${feature[2]} aa`;
const title = `data-subpart="${featureType} ${pipe} ${lengthAa}"`;
const data = title;
const pos = `x="${x}" width="${width}" y="${y}" height="${height}"`;
const topoCls = isTopology ? ' topology' : '';
const cls = `class="subpart domain${topoCls}" `;
const addTopBottom = !isTopology;
const line =
getFeatureBorderLines(x, y, width, height, lineColor, addTopBottom);
const domainSvg =
`<rect ${cls} rx="1.5" fill="${color}" ${pos} ${data} ${topoAttr}/>` +
line;
return domainSvg;
}
/** Return whether protein SVG should be shown */
function isEligibleforProteinSvg(gene, ideo) {
return (
ideo.config.showProteinInTooltip &&
!(
'proteinCache' in Ideogram === false ||
gene in Ideogram.proteinCache === false ||
('spliceExons' in Ideogram === false || Ideogram.spliceExons === false)
)
);
}
function getProteinRect(cds, hasTopology) {
const y = hasTopology ? '43' : '35';
const fill = hasTopology ? 'BBB' : 'DDD';
const stroke = hasTopology ? '555' : '777';
const proteinRect =
`<rect class="_ideoProteinLine"` +
`x="${cds.px.start}" width="${cds.px.length}" ` +
`y="${y}" height="4" ` +
`fill="#${fill}" ` +
`stroke="#${stroke}" ` +
`/>`;
return proteinRect;
}
/**
* Determine if any protein isoforms for this gene have topology features
*
* Helps ensure transcripts can be rapidly navigated via arrows in dropdown
* menu, even when some proteins of the gene have topology and some do not.
*
* Example: LDLR
*/
export function getHasTopology(gene, ideo) {
const hasTopology = Ideogram.proteinCache[gene]?.some(entry => {
return entry.protein.some(
feature => isTopologyFeature(feature)
);
});
return hasTopology;
}
/** Get SVG showing 2D protein features, e.g. domains from InterPro */
export function getProtein(
structureName, subparts, isPositiveStrand, hasTopology, ideo
) {
let features = [];
const gene = getGeneFromStructureName(structureName, ideo);
const isEligible = isEligibleforProteinSvg(gene, ideo);
if (!isEligible) return ['<br/>', null];
const entry = Ideogram.proteinCache[gene].find(d => {
return d.transcriptName === structureName;
});
if (!entry) return ['<br/>', null];
const protein = entry.protein;
const cds = getCdsCoordinates(subparts, isPositiveStrand);
// Number of amino acids in protein
//
// Some principles of molecular biology:
// - Coding sequence (CDS) of mRNA specifies amino acids comprising protein
// - Each amino acid is specified by 3 nucleotides (i.e. codon; 3 nt / aa)
// - 1 codon -- the stop codon -- is not part of protein
//
// TODO: account for phase
//
// const proteinLengthAa = Math.floor(cds.bp.length/3) - 1;
const proteinLengthAa = null;
const domains = addPositions(subparts, protein);
const topologies = [];
for (let i = 0; i < domains.length; i++) {
const domain = domains[i];
const isTopology = isTopologyFeature(domain);
const svg = getFeatureSvg(domain, cds, isPositiveStrand, hasTopology);
if (isTopology) {
topologies.push(svg);
} else {
features.push(svg);
}
}
// Order SVG so protein domains, sites, etc. are in front,
// then unannotated protein,
// then protein topology features
const proteinRect = getProteinRect(cds, hasTopology);
topologies.push(proteinRect);
features = topologies.concat(features);
const proteinSvg =
`<g id="_ideoProtein">${features.join('')}</g>`;
return [proteinSvg, proteinLengthAa];
}