molstar
Version:
A comprehensive macromolecular library.
106 lines (105 loc) • 4.26 kB
JavaScript
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
const Spec = {
'MOL_ID': '',
'MOLECULE': '',
'CHAIN': '',
'FRAGMENT': '',
'SYNONYM': '',
'EC': '',
'ENGINEERED': '',
'MUTATION': '',
'OTHER_DETAILS': ''
};
export function parseCmpnd(lines, lineStart, lineEnd) {
const getLine = (n) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]);
let currentSpec;
let currentCompound = { chains: [], description: '' };
const compounds = [];
for (let i = lineStart; i < lineEnd; i++) {
const line = getLine(i);
// COLUMNS DATA TYPE FIELD DEFINITION
// ----------------------------------------------------------------------------------
// 1 - 6 Record name "COMPND"
// 8 - 10 Continuation continuation Allows concatenation of multiple records.
// 11 - 80 Specification compound Description of the molecular components.
// list
const cmpnd = line.substr(10, 70).trim();
const cmpndSpecEnd = cmpnd.indexOf(':');
const cmpndSpec = cmpnd.substring(0, cmpndSpecEnd);
let value;
if (cmpndSpec in Spec) {
currentSpec = cmpndSpec;
value = cmpnd.substring(cmpndSpecEnd + 2);
}
else {
value = cmpnd;
}
value = value.replace(/;$/, '');
if (currentSpec === 'MOL_ID') {
currentCompound = {
chains: [],
description: ''
};
compounds.push(currentCompound);
}
else if (currentSpec === 'MOLECULE') {
if (currentCompound.description)
currentCompound.description += ' ';
currentCompound.description += value;
}
else if (currentSpec === 'CHAIN') {
Array.prototype.push.apply(currentCompound.chains, value.split(/\s*,\s*/));
}
}
// Define a seprate entity for each chain
// --------------------------------------
//
// This is a workaround for how sequences are currently determined for PDB files.
//
// The current approach infers the "observed sequence" from the atomic hierarchy.
// However, for example for PDB ID 3HHR, this approach fails, since chains B and C
// belong to the same entity but contain different observed sequence, which causes display
// errors in the sequence viewer (since the sequences are determined "per entity").
//
// A better approach could be to parse SEQRES categories and use it to construct
// entity_poly_seq category. However, this would require constructing label_seq_id (with gaps)
// from RES ID pdb column (auth_seq_id), which isn't a trivial exercise.
//
// (properly formatted) mmCIF structures do not exhibit this issue.
const singletons = [];
for (const comp of compounds) {
for (const chain of comp.chains) {
singletons.push({
description: comp.description,
chains: [chain]
});
}
}
return singletons;
}
export function parseHetnam(lines, lineStart, lineEnd) {
const getLine = (n) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]);
const hetnams = new Map();
for (let i = lineStart; i < lineEnd; i++) {
const line = getLine(i);
// COLUMNS DATA TYPE FIELD DEFINITION
// ----------------------------------------------------------------------------
// 1 - 6 Record name "HETNAM"
// 9 - 10 Continuation continuation Allows concatenation of multiple records.
// 12 - 14 LString(3) hetID Het identifier, right-justified.
// 16 - 70 String text Chemical name.
const het = line.substr(11, 3).trim();
const name = line.substr(15).trim();
if (hetnams.has(het)) {
hetnams.set(het, `${hetnams.get(het)} ${name}`);
}
else {
hetnams.set(het, name);
}
}
return hetnams;
}