UNPKG

molstar

Version:

A comprehensive macromolecular library.

144 lines (143 loc) 6.71 kB
/** * Copyright (c) 2019-2026 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> */ const Spec = { 'MOL_ID': '', 'MOLECULE': '', 'CHAIN': '', 'FRAGMENT': '', 'SYNONYM': '', 'EC': '', 'ENGINEERED': '', 'MUTATION': '', 'OTHER_DETAILS': '' }; export function parseCmpnd(lines, lineStart, lineEnd) { const getLine = (n) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]); let currentSpec; let currentCompound = { chains: [], description: '' }; const compounds = []; for (let i = lineStart; i < lineEnd; i++) { const line = getLine(i); // COLUMNS DATA TYPE FIELD DEFINITION // ---------------------------------------------------------------------------------- // 1 - 6 Record name "COMPND" // 8 - 10 Continuation continuation Allows concatenation of multiple records. // 11 - 80 Specification compound Description of the molecular components. // list const cmpnd = line.substring(10, 80).trim(); const cmpndSpecEnd = cmpnd.indexOf(':'); const cmpndSpec = cmpnd.substring(0, cmpndSpecEnd); let value; if (cmpndSpec in Spec) { currentSpec = cmpndSpec; value = cmpnd.substring(cmpndSpecEnd + 2); } else { value = cmpnd; } value = value.replace(/;$/, ''); if (currentSpec === 'MOL_ID') { currentCompound = { chains: [], description: '' }; compounds.push(currentCompound); } else if (currentSpec === 'MOLECULE') { if (currentCompound.description) currentCompound.description += ' '; currentCompound.description += value; } else if (currentSpec === 'CHAIN') { Array.prototype.push.apply(currentCompound.chains, value.split(/\s*,\s*/)); } } // Define a seprate entity for each chain // -------------------------------------- // // This is a workaround for how sequences are currently determined for PDB files. // // The current approach infers the "observed sequence" from the atomic hierarchy. // However, for example for PDB ID 3HHR, this approach fails, since chains B and C // belong to the same entity but contain different observed sequence, which causes display // errors in the sequence viewer (since the sequences are determined "per entity"). // // A better approach could be to parse SEQRES categories and use it to construct // entity_poly_seq category. However, this would require constructing label_seq_id (with gaps) // from RES ID pdb column (auth_seq_id), which isn't a trivial exercise. // // (properly formatted) mmCIF structures do not exhibit this issue. const singletons = []; for (const comp of compounds) { for (const chain of comp.chains) { singletons.push({ description: comp.description, chains: [chain] }); } } return singletons; } export function parseHetnam(lines, lineStart, lineEnd) { const getLine = (n) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]); const hetnams = new Map(); for (let i = lineStart; i < lineEnd; i++) { const line = getLine(i); // COLUMNS DATA TYPE FIELD DEFINITION // ---------------------------------------------------------------------------- // 1 - 6 Record name "HETNAM" // 9 - 10 Continuation continuation Allows concatenation of multiple records. // 12 - 14 LString(3) hetID Het identifier, right-justified. // 16 - 70 String text Chemical name. const het = line.substring(11, 14).trim(); const name = line.substring(15).trim(); // support any length if (hetnams.has(het)) { hetnams.set(het, `${hetnams.get(het)} ${name}`); } else { hetnams.set(het, name); } } return hetnams; } export function parseSeqres(lines, lineStart, lineEnd) { const getLine = (n) => lines.data.substring(lines.indices[2 * n], lines.indices[2 * n + 1]); const seqresMap = new Map(); for (let i = lineStart; i < lineEnd; i++) { const line = getLine(i); // COLUMNS DATA TYPE FIELD DEFINITION // ------------------------------------------------------------------------------------- // 1 - 6 Record name "SEQRES" // 8 - 10 Integer serNum Serial number of the SEQRES record for the // current chain. Starts at 1 and increments // by one each line. Reset to 1 for each chain. // 12 Character chainID Chain identifier. This may be any single // legal character, including a blank which is // is used if there is only one chain. // 14 - 17 Integer numRes Number of residues in the chain. // This value is repeated on every record. // 20 - 22 Residue name resName Residue name. // 24 - 26 Residue name resName Residue name. // 28 - 30 Residue name resName Residue name. // 32 - 34 Residue name resName Residue name. // 36 - 38 Residue name resName Residue name. // 40 - 42 Residue name resName Residue name. // 44 - 46 Residue name resName Residue name. // 48 - 50 Residue name resName Residue name. // 52 - 54 Residue name resName Residue name. // 56 - 58 Residue name resName Residue name. // 60 - 62 Residue name resName Residue name. // 64 - 66 Residue name resName Residue name. // 68 - 70 Residue name resName Residue name. const chainId = line.substring(11, 12); const residues = line.substring(19).trim().split(/\s+/); // support any number if (!seqresMap.has(chainId)) { seqresMap.set(chainId, []); } seqresMap.get(chainId).push(...residues); } return seqresMap; }