UNPKG

molstar

Version:

A comprehensive macromolecular library.

256 lines (255 loc) 10.9 kB
/** * Copyright (c) 2019-2024 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author David Sehnal <david.sehnal@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> * @author Yana Rose <yana.v.rose@gmail.com> */ import { substringStartsWith } from '../../../mol-util/string'; import { CifCategory, CifField } from '../../../mol-io/reader/cif'; import { Tokenizer } from '../../../mol-io/reader/common/text/tokenizer'; import { parseCryst1, parseRemark350, parseMtrix } from './assembly'; import { parseHelix, parseSheet } from './secondary-structure'; import { parseCmpnd, parseHetnam } from './entity'; import { ComponentBuilder } from '../common/component'; import { EntityBuilder } from '../common/entity'; import { Column } from '../../../mol-data/db'; import { getMoleculeType } from '../../../mol-model/structure/model/types'; import { getAtomSiteTemplate, addAtom, getAtomSite, LabelAsymIdHelper } from './atom-site'; import { addAnisotropic, getAnisotropicTemplate, getAnisotropic } from './anisotropic'; import { parseConect } from './conect'; import { isDebugMode } from '../../../mol-util/debug'; import { addHeader } from './header'; export async function pdbToMmCif(pdb) { const { lines } = pdb; const { data, indices } = lines; const tokenizer = Tokenizer(data); const isPdbqt = !!pdb.isPdbqt; // Count the atoms let atomCount = 0; let anisotropicCount = 0; for (let i = 0, _i = lines.count; i < _i; i++) { const s = indices[2 * i], e = indices[2 * i + 1]; switch (data.charAt(s)) { case 'A': if (substringStartsWith(data, s, e, 'ATOM ')) atomCount++; else if (substringStartsWith(data, s, e, 'ANISOU')) anisotropicCount++; break; case 'H': if (substringStartsWith(data, s, e, 'HETATM')) atomCount++; break; } } const header = {}; const atomSite = getAtomSiteTemplate(data, atomCount); const anisotropic = getAnisotropicTemplate(data, anisotropicCount); const entityBuilder = new EntityBuilder(); const helperCategories = []; const heteroNames = []; let modelNum = 0, modelStr = ''; let conectRange = undefined; let hasAssemblies = false; const terIndices = new Set(); for (let i = 0, _i = lines.count; i < _i; i++) { let s = indices[2 * i], e = indices[2 * i + 1]; switch (data.charAt(s)) { case 'A': if (substringStartsWith(data, s, e, 'ATOM ')) { if (!modelNum) { modelNum++; modelStr = '' + modelNum; } addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt); } else if (substringStartsWith(data, s, e, 'ANISOU')) { addAnisotropic(anisotropic, modelStr, tokenizer, s, e); } break; case 'C': if (substringStartsWith(data, s, e, 'CRYST1')) { helperCategories.push(...parseCryst1(pdb.id || '?', data.substring(s, e))); } else if (substringStartsWith(data, s, e, 'CONECT')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'CONECT')) break; j++; } if (conectRange) { if (isDebugMode) { console.log('only single CONECT block allowed, ignoring others'); } } else { conectRange = [i, j]; } i = j - 1; } else if (substringStartsWith(data, s, e, 'COMPND')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'COMPND')) break; j++; } entityBuilder.setCompounds(parseCmpnd(lines, i, j)); i = j - 1; } break; case 'H': if (substringStartsWith(data, s, e, 'HEADER')) { addHeader(data, s, e, header); } else if (substringStartsWith(data, s, e, 'HETATM')) { if (!modelNum) { modelNum++; modelStr = '' + modelNum; } addAtom(atomSite, modelStr, tokenizer, s, e, isPdbqt); } else if (substringStartsWith(data, s, e, 'HELIX')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'HELIX')) break; j++; } helperCategories.push(parseHelix(lines, i, j)); i = j - 1; } else if (substringStartsWith(data, s, e, 'HETNAM')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'HETNAM')) break; j++; } heteroNames.push(...Array.from(parseHetnam(lines, i, j).entries())); i = j - 1; } break; case 'M': if (substringStartsWith(data, s, e, 'MODEL ')) { modelNum++; modelStr = '' + modelNum; } if (substringStartsWith(data, s, e, 'MTRIX')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'MTRIX')) break; j++; } helperCategories.push(...parseMtrix(lines, i, j)); i = j - 1; } // TODO: MODRES records => pdbx_struct_mod_residue break; case 'O': // TODO: ORIGX record => cif.database_PDB_matrix.origx, cif.database_PDB_matrix.origx_vector break; case 'R': if (substringStartsWith(data, s, e, 'REMARK 350')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'REMARK 350')) break; j++; } helperCategories.push(...parseRemark350(lines, i, j)); i = j - 1; hasAssemblies = true; } break; case 'S': if (substringStartsWith(data, s, e, 'SHEET')) { let j = i + 1; while (true) { s = indices[2 * j]; e = indices[2 * j + 1]; if (!substringStartsWith(data, s, e, 'SHEET')) break; j++; } helperCategories.push(parseSheet(lines, i, j)); i = j - 1; } // TODO: SCALE record => cif.atom_sites.fract_transf_matrix, cif.atom_sites.fract_transf_vector break; case 'T': if (substringStartsWith(data, s, e, 'TER')) { terIndices.add(atomSite.index); } } } // build entry, struct_keywords and pdbx_database_status if (header.id_code) { const entry = { id: CifField.ofString(header.id_code) }; helperCategories.push(CifCategory.ofFields('entry', entry)); } if (header.classification) { const struct_keywords = { pdbx_keywords: CifField.ofString(header.classification) }; helperCategories.push(CifCategory.ofFields('struct_keywords', struct_keywords)); } if (header.dep_date) { const pdbx_database_status = { recvd_initial_deposition_date: CifField.ofString(header.dep_date) }; helperCategories.push(CifCategory.ofFields('pdbx_database_status', pdbx_database_status)); } // build entity and chem_comp categories const seqIds = Column.ofIntTokens(atomSite.auth_seq_id); const atomIds = Column.ofStringTokens(atomSite.auth_atom_id); const compIds = Column.ofStringTokens(atomSite.auth_comp_id); const asymIds = Column.ofStringTokens(atomSite.auth_asym_id); const labelAsymIdHelper = new LabelAsymIdHelper(asymIds, atomSite.pdbx_PDB_model_num, terIndices, hasAssemblies); const componentBuilder = new ComponentBuilder(seqIds, atomIds); componentBuilder.setNames(heteroNames); entityBuilder.setNames(heteroNames); for (let i = 0, il = compIds.rowCount; i < il; ++i) { const compId = compIds.value(i); const moleculeType = getMoleculeType(componentBuilder.add(compId, i).type, compId); const asymId = labelAsymIdHelper.get(i); atomSite.label_entity_id[i] = entityBuilder.getEntityId(compId, moleculeType, asymId); } const atom_site = getAtomSite(atomSite, labelAsymIdHelper, { hasAssemblies }); if (!isPdbqt) delete atom_site.partial_charge; if (conectRange) { helperCategories.push(parseConect(lines, conectRange[0], conectRange[1], atom_site)); } const categories = { entity: CifCategory.ofTable('entity', entityBuilder.getEntityTable()), chem_comp: CifCategory.ofTable('chem_comp', componentBuilder.getChemCompTable()), atom_site: CifCategory.ofFields('atom_site', atom_site), atom_site_anisotrop: CifCategory.ofFields('atom_site_anisotrop', getAnisotropic(anisotropic)) }; for (const c of helperCategories) { categories[c.name] = c; } return { header: pdb.id || 'PDB', categoryNames: Object.keys(categories), categories }; }