UNPKG

kekule

Version:

Open source JavaScript toolkit for chemoinformatics

740 lines (722 loc) 21.4 kB
/** * @fileoverview * Some common routines for both MOL/RXN 2000 and MOL/RXN 3000 files. * @author Partridge Jiang */ /* * requires /lan/classes.js * requires /core/kekule.common.js * requires /utils/kekule.utils.js * requires /data/kekule.dataUtils.js * requires /core/kekule.structures.js * requires /core/kekule.reactions.js * requires /localization */ /** * Enumeration of MDL MOL/RXN file versions. * @enum */ Kekule.IO.MdlVersion = { V2000: 2, V3000: 3 }; /** * Some constants about MDL. * @class */ Kekule.IO.MDL = { VER2000: 'V2000', VER3000: 'V3000', SYMBOL_ANYATOM: 'A', SYMBOL_STARATOM: '*', SYMBOL_HETEROATOM: 'Q', SYMBOL_RGROUP: 'R', SYMBOL_RGROUP2: 'R#', SYMBOL_LONEPAIR: 'LP', SYMBOL_ATOMLIST: 'L', SYMBOL_DUMMYATOM: 'Du', // is this symbol legal in MDL? SD_DATA_HEAD_PREFIX: '>', MOL_DELIMITER: '$$$$' }; /** * Util methods for MDL 2000 format * @class */ Kekule.IO.MdlUtils = { /** * Check if an atom symbol is standing for a dummy atom. * @param {String} value * @returns {Bool} */ isUnspecifiedAtomSymbol: function(value) { return ((value === Kekule.IO.MDL.SYMBOL_ANYATOM) || (value === Kekule.IO.MDL.SYMBOL_STARATOM)); }, /** * Check if an atom symbol is standing for a hetero atom. * @param {String} value * @returns {Bool} */ isHeteroAtomSymbol: function(value) { return (value === Kekule.IO.MDL.SYMBOL_HETEROATOM); }, /** * Check if an atom symbol is standing for a RGroup * @param {String} value * @returns {Bool} */ isRGroupSymbol: function(value) { return (value === Kekule.IO.MDL.SYMBOL_RGROUP) || ((value === Kekule.IO.MDL.SYMBOL_RGROUP2)); }, /** * Check if an atom symbol is standing for a lone pair * @param {String} value * @returns {Bool} */ isLonePairSymbol: function(value) { return (value === Kekule.IO.MDL.SYMBOL_LONEPAIR); }, isAtomListSymbol: function(value) { return (value === Kekule.IO.MDL.SYMBOL_ATOMLIST); }, /** * Analysis MDL date time string as MMDDYYHHmm or MMDDYYYYHHmm (long format). * @param {String} str * @param {Bool} isLongFormat * @returns {Date} */ analysisMdlDateTimeStr: function(str, isLongFormat) { var date = new Date(); var istart = 0, ilength = 2; var month = (parseInt(str.substr(istart, ilength).trim(), 10) || 1) - 1; istart += ilength; var day = parseInt(str.substr(istart, ilength).trim(), 10) || 0; istart += ilength; ilength = isLongFormat? 4: 2; var year = parseInt(str.substr(istart, ilength), 10) || 0; if (!isLongFormat) { if (year >= 70) // assume 1970-1999 year += 1900; else // 2000-2069 year += 2000; } date.setFullYear(year, month, day); istart += ilength; ilength = 2; var hour = parseInt(str.substr(istart, ilength), 10) || 0; istart += ilength; var minute = parseInt(str.substr(istart, ilength), 10) || 0; date.setHours(hour, minute); return date; }, /** * Get a MDL date time string as MMDDYYHHmm or MMDDYYYYHHmm (long format). * @param {Date} date * @param {Bool} useLongFormat * @returns {String} */ generateMdlDateTimeStr: function(date, useLongFormat) { var s = ''; // MMDDYYHHmm or MMDDYYYYHHmm s += (date.getMonth() + 1).toString().lpad(2, '0'); s += date.getDate().toString().lpad(2, '0'); if (useLongFormat) s += date.getFullYear().toString().lpad(4, '0'); else s += (date.getFullYear() % 100).toString().lpad(2, '0'); s += date.getHours().toString().lpad(2, '0'); s += date.getMinutes().toString().lpad(2, '0'); return s; }, /** * Convert a MDL radical value to a Kekule one. * @param {Int} value * @returns {Int} */ mdlRadicalToKekule: function(value) { return value; }, /** * Convert a Kekule radical value to a MDL one. * @param {Int} value * @returns {Int} */ kekuleRadicalToMdl: function(value) { return value; }, /** * Convert a MDL bond type value to {@link Kekule.BondOrder} value. * @param {Int} value */ bondTypeToKekuleOrder: function(value) { switch (value) { case 1: return Kekule.BondOrder.SINGLE; break; case 2: return Kekule.BondOrder.DOUBLE; break; case 3: return Kekule.BondOrder.TRIPLE; break; case 4: return Kekule.BondOrder.EXPLICIT_AROMATIC; break; default: // value 5-8 is used for SSS query, here all returns UNSET return Kekule.BondOrder.UNSET; } }, /** * Convert a Kekule bond order to MDL bond type. * @param {Int} value */ kekuleBondOrderToMdlType: function(value) { switch (value) { case Kekule.BondOrder.SINGLE: return 1; break; case Kekule.BondOrder.DOUBLE: return 2; break; case Kekule.BondOrder.TRIPLE: return 3; break; case Kekule.BondOrder.EXPLICIT_AROMATIC: return 4; break; default: return 1; } }, /** * Get version from first line of a RXN file. * @param {String} line * @returns {Int} Value from {@link Kekule.IO.MdlVersion} */ getRxnMarkVersion: function(line) { var s = line.substr(0, 4); if (s != '$RXN') // identifier wrong { return null; } else // get file version { s = line.substr(4).trim(); return (s == Kekule.IO.MDL.VER3000)? Kekule.IO.MdlVersion.V3000: Kekule.IO.MdlVersion.V2000; } }, /** * Check if a object can be output as MDL MOL/CTAB format. * @params {Variant} obj Generally a {@link Kekule.StructureFragment} with Ctab can be output well. * @returns {Bool} */ assertIlegalForCtabOutput: function(obj) { // assert obj is a Kekule.StructureFragment and has Ctab if (!(obj instanceof Kekule.StructureFragment)) { Kekule.error(/*Kekule.ErrorMsg.CAN_NOT_WRITE_NON_MOLECULE_TO_MOL*/Kekule.$L('ErrorMsg.CAN_NOT_WRITE_NON_MOLECULE_TO_MOL')); return false; } else if (!obj.hasCtab()) // no ctab, can not output either { Kekule.error(/*Kekule.ErrorMsg.MOLECULE_HAS_NO_CTAB_TO_OUTPUT*/Kekule.$L('ErrorMsg.MOLECULE_HAS_NO_CTAB_TO_OUTPUT')); return false; } else return true; } }; /** * Utils to create Kekule structures by JSON info read from MDL data. * @class * @private */ Kekule.IO.MdlStructureUtils = { /** * Create CTab of fragment by ctabInfo JSON data. * @param {Kekule.StructureFragment} fragment * @param {Hash} ctabInfo * @param {Int} coordMode Value from {@link Kekule.CoordMode}, force coordinate in which mode. * @returns {Kekule.StructureFragment} * @private */ fillFragment: function(fragment, ctabInfo, coordMode) { fragment.clear(); if (ctabInfo) { // atoms var isCoord3D; if (coordMode && (coordMode != Kekule.CoordMode.UNKNOWN)) isCoord3D = coordMode == Kekule.CoordMode.COORD3D; else isCoord3D = ctabInfo.atomInfos.isCoord3D; if (ctabInfo.atomInfos) { for (var i = 0, l = ctabInfo.atomInfos.length; i < l; ++i) { var info = ctabInfo.atomInfos[i]; if (info) { var atom = Kekule.IO.MdlStructureUtils.createStructureNode(fragment, info, isCoord3D); if (atom) fragment.appendNode(atom); else Kekule.raise(/*Kekule.ErrorMsg.MDL_CTAB_ATOM_CANNOT_CREATE*/Kekule.$L('ErrorMsg.MDL_CTAB_ATOM_CANNOT_CREATE')); } } } // bonds if (ctabInfo.bondInfos) { for (var i = 0, l = ctabInfo.bondInfos.length; i < l; ++i) { var info = ctabInfo.bondInfos[i]; if (info) { var bond = Kekule.IO.MdlStructureUtils.createStructureConnector(fragment, info); if (bond) fragment.appendConnector(bond); else Kekule.raise(/*Kekule.ErrorMsg.MDL_CTAB_BOND_CANNOT_CREATE*/Kekule.$L('ErrorMsg.MDL_CTAB_BOND_CANNOT_CREATE')); } } } // sgroup if (ctabInfo.sgInfos && ctabInfo.sgInfos.length) { var subGroupInfo = []; for (var i = 0, l = ctabInfo.sgInfos.length; i < l; ++i) { var info = ctabInfo.sgInfos[i]; if (info && (info.sgType == 'SUP')) // superAtom, subgroup { var atoms = []; for (var j = 0, k = info.atomIndexes.length; j < k; ++j) atoms.push(fragment.getNodeAt(info.atomIndexes[j])); if (atoms.length) subGroupInfo.push({'atoms': atoms, 'text': info.label}); } } for (var i = 0, l = subGroupInfo.length; i < l; ++i) { var atoms = subGroupInfo[i].atoms; if (atoms.length) { var subGroup = fragment.marshalSubFragment(atoms, new Kekule.SubGroup()); var groupText = subGroupInfo[i].text; if (subGroup.setFormulaText && groupText.match(/.+\d/)) // text has number at middle, may be a formula subGroup.setFormulaText(subGroupInfo[i].text); else subGroup.setAbbr(subGroupInfo[i].text); } } } } return fragment; }, /** * Create {@link Kekule.Atom} or other structure node by atomInfo read from MDL data. * @param {Kekule.StructureFragment} fragment * @param {Object} atomInfo * @returns {Kekule.ChemStructureNode} * @private */ createStructureNode: function(fragment, atomInfo, isCoord3D) { //console.log(atomInfo); var result; // create suitable node first //console.log(atomInfo.atomListInfo); if (atomInfo.atomListInfo || Kekule.IO.MdlUtils.isAtomListSymbol(atomInfo.symbol)) // an atom list { result = new Kekule.VariableAtom(); // fill isotope list var list = []; if (atomInfo.atomListInfo && atomInfo.atomListInfo.symbols) { list = list.concat(atomInfo.atomListInfo.symbols); if (atomInfo.atomListInfo.isAllowList) result.setAllowedIsotopeIds(list); else result.setDisallowedIsotopeIds(list); } } else if (Kekule.IO.MdlUtils.isRGroupSymbol(atomInfo.symbol)) { result = new Kekule.RGroup(); } else if (Kekule.IO.MdlUtils.isHeteroAtomSymbol(atomInfo.symbol)) { result = new Kekule.Pseudoatom(null, Kekule.PseudoatomType.HETERO); } else if (Kekule.IO.MdlUtils.isUnspecifiedAtomSymbol(atomInfo.symbol)) { result = new Kekule.Pseudoatom(null, Kekule.PseudoatomType.ANY); } else // normal atom? { var elemInfo = Kekule.ChemicalElementsDataUtil.getElementInfo(atomInfo.symbol); if (elemInfo) { // get massNumber var massNumber = null; if (atomInfo.massNumber) massNumber = atomInfo.massNumber; else if (atomInfo.massDiff) // need get normal mass number and calculate { //var elemInfo = Kekule.ChemicalElementsDataUtil.getElementInfo(atomInfo.symbol); if (elemInfo) { var naturalMass = elemInfo.naturalMass; var massNumber = Math.round(naturalMass + atomInfo.massDiff); var isotopeInfo = Kekule.IsotopesDataUtil.getIsotopeInfo(elemInfo.atomicNumber, massNumber); if (!isotopeInfo) { massNumber = Math.floor(naturalMass + atomInfo.massDiff); isotopeInfo = Kekule.IsotopesDataUtil.getIsotopeInfo(elemInfo.atomicNumber, massNumber); } if (!isotopeInfo) { massNumber = Math.ceil(naturalMass + atomInfo.massDiff); isotopeInfo = Kekule.IsotopesDataUtil.getIsotopeInfo(elemInfo.atomicNumber, massNumber); } if (!isotopeInfo) // not found, do not consider isotope massNumber = null; } } result = new Kekule.Atom(null, atomInfo.symbol, massNumber); } else // may be an isotope alias { var isoInfo = Kekule.IsotopesDataUtil.getIsotopeInfo(atomInfo.symbol); if (isoInfo) result = new Kekule.Atom(null, isoInfo.atomicNumber, isoInfo.massNumber); else // has no element info, create a pseudo atom result = new Kekule.Pseudoatom(null, Kekule.PseudoatomType.ANY); } } // then node detail, coordinate, hydrongenCount and charge if (atomInfo.charge) result.setCharge(atomInfo.charge); if (atomInfo.radical) result.setRadical(atomInfo.radical); if (atomInfo.parity && result.setParity) result.setParity(atomInfo.parity); if (typeof(atomInfo.hydrongenCount) != 'undefined') { if (result.setExplicitHydrogenCount) result.setExplicitHydrogenCount(atomInfo.hydrongenCount); } if (isCoord3D) result.setCoord3D({'x': atomInfo.x, 'y': atomInfo.y, 'z': atomInfo.z}); else result.setCoord2D({'x': atomInfo.x, 'y': atomInfo.y}); return result; }, /** * Create {@link Kekule.Bond} or other structure connector by bondInfo read from MDL data. * Note: atoms in fragment should be created before creating bonds. * @param {Kekule.StructureFragment} fragment * @param {Object} bondInfo * @returns {Kekule.ChemStructureConnector} * @private */ createStructureConnector: function(fragment, bondInfo) { // find two atoms var atom1 = fragment.getNodeAt(bondInfo.atomIndex1); var atom2 = fragment.getNodeAt(bondInfo.atomIndex2); if (atom1 && atom2) { var objs = [atom1, atom2]; // check if there is multiple endpoint in MDL 3k if (bondInfo.endAtomIndexes) { for (var i = 0, l = bondInfo.endAtomIndexes.length; i < l; ++i) { var atom = fragment.getNodeAt(bondInfo.endAtomIndexes[i]); if (atom) objs.push(atom); } } var result = new Kekule.Bond(null, objs, bondInfo.order); if (typeof(bondInfo.stereo) != 'undefined') result.setStereo(bondInfo.stereo); return result; } else // atom not exists, return null return null; }, /** * Check if a node is {@link Kekule.VariableAtom} which should be translate to MDL atom list. * @param {Object} node * @private */ isNodeVariableAtom: function(node) { return node instanceof Kekule.VariableAtom; }, /** * Return atom's type string for MDL. Usually an atom's element symbol. * @param {Kekule.ChemStructureNode} node * @returns {String} * @private */ getAtomTypeStr: function(node, is2k) { if (node instanceof Kekule.Atom) return node.getSymbol(); else if (node instanceof Kekule.Pseudoatom) { switch (node.getAtomType()) { case Kekule.PseudoatomType.ANY: return Kekule.IO.MDL.SYMBOL_ANYATOM; case Kekule.PseudoatomType.HETERO: return Kekule.IO.MDL.SYMBOL_HETEROATOM; //case Kekule.PseudoatomType.DUMMY: return Kekule.IO.MDL.SYMBOL_DUMMYATOM; default: return Kekule.IO.MDL.SYMBOL_DUMMYATOM; } } else if (node instanceof Kekule.RGroup) return Kekule.IO.MDL.SYMBOL_RGROUP; else if (node instanceof Kekule.VariableAtom) // atom list, here returns str for MDL 3000 { if (is2k) return Kekule.IO.MDL.SYMBOL_ATOMLIST; else { var s = '['; var ids = node.getAllowedIsotopeIds(); if (ids && ids.length) // allow list ; else // disallow list { s = 'NOT['; ids = node.getDisallowedIsotopeIds(); } if (ids && ids.length) { var symbols = []; for (var i = 0, l = ids.length; i < l; ++i) { var detail = Kekule.IsotopesDataUtil.getIsotopeIdDetail(ids[i]); if (detail && detail.symbol) symbols.push(detail.symbol); } var sinner = symbols.join(','); s = s + sinner + ']'; return s; } else // no explicit ids return Kekule.IO.MDL.SYMBOL_ATOMLIST; } } else // do not know what atom return '?'; }, /** * Get basic molecule info such as atoms, bonds, subgroups for Ctab writer. * @param {Kekule.StructureFragment} mol * @returns {Hash} {atoms, bonds, subGroups, coordMode} * @private */ getMoleculeCtabStructureInfo: function(mol) { var result = {}; result.atoms = mol.getLeafNodes(); result.bonds = mol.getAllChildConnectors(); result.subGroups = mol.getSubFragments(); var count2d = 0, count3d = 0; for (var i = 0, l = result.atoms.length; i < l; ++i) { if (result.atoms[i].hasCoord3D()) ++count3d; if (result.atoms[i].hasCoord2D()) ++count2d; } result.coordMode = (count2d > count3d)? Kekule.CoordMode.COORD2D: Kekule.CoordMode.COORD3D; return result; }, /** * Split node connected with connector to 2 groups: 2 primaryNodes and other remainNodes * @param {Kekule.ChemStructureConnector} connector * @private */ splitConnectedNodes: function(connector) { var result = {'primaryNodes': []}; var count = 0; var objs = connector.getConnectedObjs(); for (var i = 0, l = objs.length; i < l; ++i) { if (objs[i] instanceof Kekule.ChemStructureNode) // bond-bond connection is not supported by MDL { if (count < 2) // add to node1/2 { result.primaryNodes.push(objs[i]); ++count; } else // remainNodes { if (!result.remainNodes) result.remainNodes = []; result.remainNodes.push(objs[i]); } } } /* if (result.primaryNodes.length < 2) { for (var i = result.primaryNodes.length; i < 2; ++i) atoms.push(null); } */ return result; }, /** * Generate 2k or 3k compatibility count line string of molecule. * @param {Hash} molInfo Info returned by {@link Kekule.IO.MdlStructureUtils.getMoleculeCtabStructureInfo}. * @param {Int} mdlVersion * @returns {String} * @private */ generateClassicStyleCountLine: function(molInfo, mdlVersion) { if (mdlVersion == Kekule.IO.MdlVersion.V3000) return ' 0 0 0 0 0 0 V3000'; else // 2k count line { var s = ''; // format: format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv // aaa: number of atoms s += molInfo.atoms.length.toString().lpad(3); // bbb: number of bonds s += molInfo.bonds.length.toString().lpad(3); // lll: atomList count, used for query, currently bypass s += '0'.lpad(3); // fff: obsolete s += '0'.lpad(3); // ccc: chiral flag: 0=not chiral, 1=chiral // TODO: currently chiral is not considered s += '0'.lpad(3); // sss: number of stext entries (for ISIS/Desktop) s += '0'.lpad(3); // xxxrrrpppiii: obsolete for (var i = 0; i < 4; ++i) s += '0'.lpad(3); // mmm: No longer supported, the default is set to 999, ignore here s += '999'; // vvvvvv, version tag s += Kekule.IO.MDL.VER2000.lpad(6); return s; } } }; /** * A handler for MDL 2000 or 3000 blocks. Base class for block reader and writers. * @class * @augments ObjectEx */ Kekule.IO.MdlBlockHandler = Class.create(ObjectEx, /** @lends Kekule.IO.MdlBlockHandler# */ { /** @private */ CLASS_NAME: 'Kekule.IO.MdlBlockHandler', /** @private */ initProperties: function() { // private property this.defineProp('textBuffer', { 'dataType': 'Kekule.TextLinesBuffer', 'serializable': false, 'getter': function() { var r = this.getPropStoreFieldValue('textBuffer'); if (!r) { r = this.createTextBuffer(); this.setPropStoreFieldValue('textBuffer', r); } return r; }, 'setter': null }); }, /** * V2000 and V3000 reader has different text formats, so different text buffer class is needed. * @private */ createTextBuffer: function() { return new Kekule.TextLinesBuffer(); } }); /** * Base class of readers to read different MDL blocks (2000 or 3000). * @class * @augments Kekule.IO.MdlBlockHandler */ Kekule.IO.MdlBlockReader = Class.create(Kekule.IO.MdlBlockHandler, /** @lends Kekule.IO.MdlBlockReader# */ { /** @private */ CLASS_NAME: 'Kekule.IO.MdlBlockReader', /** * Read a text block and return a suitable object. * @param {Variant} textOrLines String or array of string. * @param {Kekule.ChemObject} parentObj * @returns {Variant} */ readBlock: function(textOrLines, parentObj) { if (typeof(textOrLines) == 'string') this.getTextBuffer().setText(textOrLines); else this.getTextBuffer().setLines(textOrLines); this.getTextBuffer().reset(); return this.doReadBlock(this.getTextBuffer()); }, /** * Do the actual work of {@link Kekule.IO.MdlBlockReader#readBlock}. * Read content in textBuffer and create suitable object. Descendants should override this method. * @param {Kekule.TextLinesBuffer} textBuffer * @param {Kekule.ChemObject} parentObj * @private */ doReadBlock: function(textBuffer, parentObj) { // do nothing here. } }); /** * Base class of writers to write different MDL blocks (2000 or 3000). * @class * @augments Kekule.IO.MdlBlockHandler */ Kekule.IO.MdlBlockWriter = Class.create(Kekule.IO.MdlBlockHandler, /** @lends Kekule.IO.MdlBlockWriter# */ { /** @private */ CLASS_NAME: 'Kekule.IO.MdlBlockWriter', /** * Write a text block for a suitable object. * @param {Variant} obj Kekule object to write. * @returns {String} Text written. */ writeBlock: function(obj) { this.getTextBuffer().clear(); this.doWriteBlock(obj, this.getTextBuffer()); return this.getTextBuffer().getText(); }, /** * Do the actual work of {@link Kekule.IO.MdlBlockWriter#writeBlock}. Write text to textBuffer. * Read content in textBuffer and create suitable object. Descendants should override this method. * @param {Kekule.TextLinesBuffer} textBuffer * @param {Variant} obj Kekule object to write. * @private */ doWriteBlock: function(obj, textBuffer) { // do nothing here. } });