UNPKG

kekule

Version:

Open source JavaScript toolkit for chemoinformatics

1,199 lines (1,178 loc) 37.5 kB
/** * @fileoverview * File for supporting MDL CTAB/MOL/RXN V2000 data. * @author Partridge Jiang */ /* * requires /lan/classes.js * requires /core/kekule.common.js * requires /core/kekule.elements.js * requires /core/kekule.electrons.js * requires /core/kekule.structures.js * requires /core/kekule.reactions.js * requires /utils/kekule.textHelper.js * requires /io/kekule.io.js * requires /io/kekule.io.mdlBase.js * requires /localization */ /** * Util methods for MDL 2000 format. * @class * @private */ Kekule.IO.Mdl2kUtils = { /** * Turn charge value in CTAB atom line to normal value * @param {Int} value * @returns {Int} * @private */ atomLineChargeToKekule: function(value) { switch (value) { case 1: return 3; break; case 2: return 2; break; case 3: return 1; break; case 5: return -1; break; case 6: return -2; break; case 7: return -3; break; default: return 0; //0 } }, /** * Turn normal charge value to value used in CTAB atom line. * @param {Int} value * @returns {Int} * @private */ chargeToMdlAtomLineValue: function(value) { switch (value) { case 1: return 3; break; case 2: return 2; break; case 3: return 1; break; case -1: return 5; break; case -2: return 6; break; case -3: return 7; break; default: return 0; //0 } }, /** * If the charge value in atom line is 4, it does not mean a charge but a doublet radical. * @param {Int} value * @returns {Int} * @private */ atomLineChargeToRadical: function(value) { if (value == 4) return Kekule.RadicalOrder.DOUBLET; // doublet radical else return 0; }, /** * If the radical value 2(doublet), atom line charge value should be 4 * @param {Int} value * @returns {Int} * @private */ radicalToMdlAtomLineValue: function(value) { return (value == Kekule.RadicalOrder.DOUBLET)? 4: 0; }, /** * Convert charge and radical to a suitable CHG value in MDL 2k atom line. * @param {Int} charge * @param {Int} radical * @private */ chargeOrRadicalToMdlAtomLineValue: function(charge, radical) { if (charge) return Kekule.IO.Mdl2kUtils.chargeToMdlAtomLineValue(charge); else if (radical) return Kekule.IO.Mdl2kUtils.radicalToMdlAtomLineValue(charge); else return 0; }, /** * Convert a MDL bond stereo value to {@link Kekule.BondStereo} value. * @param {Int} value * @param {Int} bondOrder Note this value is a Kekule order, not a MDL one. * @returns {Int} Value from {@link Kekule.BondStereo} * @private */ bondStereoToKekule: function(value, bondOrder) { if (bondOrder == Kekule.BondOrder.SINGLE) { switch (value) { case 1: return Kekule.BondStereo.UP; break; case 4: return Kekule.BondStereo.UP_OR_DOWN; break; // either case 6: return Kekule.BondStereo.DOWN; break; default: // 0 return Kekule.BondStereo.NONE; } } else if (bondOrder == Kekule.BondOrder.DOUBLE) { switch (value) { case 3: return Kekule.BondStereo.E_Z_BY_COORDINATES; break; // cis or trans by coord default: // 0 return Kekule.BondStereo.NONE; } } else return Kekule.BondStereo.NONE; }, /** * Convert a {@link Kekule.BondStereo} value to MDL bond stereo value in bond line. * @param {Int} value Value from {@link Kekule.BondStereo}. * @param {Int} bondOrder Note this value is a Kekule order, not a MDL one. * @returns {Hash} * @private */ bondStereoToMdlBondLineValue: function(value, bondOrder, allowInverted) { var BS = Kekule.BondStereo; var result = {}; if (bondOrder == Kekule.BondOrder.SINGLE) { if (allowInverted && [BS.UP_INVERTED, BS.DOWN_INVERTED].indexOf(value) >= 0) result.inverted = true; if (result.inverted) { if (value === BS.UP_INVERTED) result.stereo = 1; else if (value === BS.DOWN_INVERTED) result.stereo = 6; } else { switch (value) { case BS.DOWN_INVERTED: case BS.UP: result.stereo = 1; break; case BS.UP_INVERTED: case BS.DOWN: result.stereo = 6; break; case BS.UP_OR_DOWN_INVERTED: case BS.UP_OR_DOWN: result.stereo = 4; break; default: // Kekule.BondStereo.NONE result.stereo = 0; break; } } } else if (bondOrder == Kekule.BondOrder.DOUBLE) { switch (value) { case Kekule.BondStereo.E_Z_BY_COORDINATES: result.stereo = 3; // cis or trans by coord break; default: // Kekule.BondStereo.NONE result.stereo = 0; } } else result.stereo = 0; return result; }, /** * Turn a float coordinate value to a MDL V2000 coordinate string. * @param {Float} value * @returns {String} * @private */ coordToStr: function(value) { return (value || 0).toFixed(4); }, /** * Get isotope's mass difference used in CTAB 2000 atom block. * @param {Object} isotope * @private */ getMassDiff: function(isotope) { var elemInfo = Kekule.ChemicalElementsDataUtil.getElementInfo(isotope.getAtomicNumber()); var naturalMass = elemInfo.naturalMass; return Math.round(isotope.getMassNumber() - naturalMass); }, /** * If node is a {@link Kekule.VariableAtom}, property line need to be add to CTAB. * This method generate such a line. * @param {Kekule.VariableAtom} node * @private */ getAtomListPropLineValue: function(node) { // M ALS aaannn e 11112222333344445555... // should generate nnn e 11112222333344445555... part var s = ''; var ids = node.getAllowedIsotopeIds(); if (ids && ids.length) // allow list s = ' F '; else // disallow list { s = ' T '; ids = node.getDisallowedIsotopeIds(); } if (ids && ids.length) { var symbols = []; for (var i = 0, l = ids.length; i < l; ++i) { var detail = Kekule.IsotopesDataUtil.getIsotopeIdDetail(ids[i]); if (detail && detail.symbol) symbols.push(detail.symbol.rpad(4)); } var sSymbols = symbols.join(''); s = symbols.length.toString().lpad(3) + s + sSymbols; return s; } else // no explicit atoms return null; }, /** * Generate leading tag for prop line. Usually 'M XXX' * @param {String} propName * @param {String} prefix * @private */ getCtabPropLineTag: function(propName, prefix) { return (prefix || 'M') + ' ' + propName; }, /** * Generate leading tag with entry count for prop line. Usually 'M XXXnnn' * @param {String} propName * @param {Int} entryCount * @param {String} prefix * @private */ getCtabPropLineCountTag: function(propName, entryCount, prefix) { var r = (prefix || 'M') + ' ' + propName; if (entryCount) r += (entryCount).toString().lpad(3); return r; }, /** * Generate property line string with values. * @param {Array} values * @param {String} propName * @param {Int} entryCount * @param {String} prefix * @private */ generateCtabPropLine: function(values, propName, entryCount, prefix) { var tag = Kekule.IO.Mdl2kUtils.getCtabPropLineCountTag(propName, entryCount, prefix); return tag + ' ' + values.join(' '); } } /** * Class to read and anaylsis MDL 2000 Connection Table block. * @class * @augments Kekule.IO.MdlBlockReader * @private */ Kekule.IO.Mdl2kCTabReader = Class.create(Kekule.IO.MdlBlockReader, /** @lends Kekule.IO.Mdl2kCTabReader# */ { /** @private */ CLASS_NAME: 'Kekule.IO.Mdl2kCTabReader', /** @private */ doReadBlock: function(textBuffer, parentObj) { return this.analysisCTab(textBuffer); }, /** * Analysis the whole ctab * @param {Kekule.TextLinesBuffer} textBuffer * @private */ analysisCTab: function(textBuffer) { var result = {}; // count line var line = textBuffer.readLine(); var countInfo = this.analysisCountLine(line); // anaylis table according to countInfo, firstly atom lines var atomInfos = []; atomInfos.isCoord3D = false; for (var i = 0; i < countInfo.atomCount; ++i) { line = textBuffer.readLine(); var atomInfo = this.analysisAtomLine(line); if (atomInfo.z) // has z coord, 3D mode atomInfos.isCoord3D = true; atomInfos.push(atomInfo); } // then bond lines var bondInfos = []; for (var i = 0; i < countInfo.bondCount; ++i) { line = textBuffer.readLine(); var bondInfo = this.analysisBondLine(line); bondInfos.push(bondInfo); } // then atom list lines for (var i = 0; i < countInfo.atomListCount; ++i) { line = textBuffer.readLine(); var atomListInfo = this.analysisAtomListInfoLine(line); // attach list info to atom if (atomInfos[atomListInfo.atomIndex]) { atomInfos[atomListInfo.atomIndex].atomListInfo = atomListInfo; } } // then stext lines, ignore currently. // each stext has two lines: // xxxxx.xxxxyyyyy.yyyy - the coordinate // TTTT... - the text for (var i = 0; i < countInfo.stextCount; ++i) { line = textBuffer.readLine(); line = textBuffer.readLine(); } // all left is properties lines till "M END" var sgroupInfos = []; while (!textBuffer.eof()) { line = textBuffer.readLine(); var info = this.analysisPropertyLine(line, atomInfos, sgroupInfos); if (info.prop === 'END') // end of connection table, break; break; if (info.prop === 'SKP') // skip lines { for (var i = 0; i < info.count; ++i) { if (textBuffer.eof()) break; line = textBuffer.readLine(); } } else if (info.leading === 'A') // atom alias { if (textBuffer.eof()) break; var atomIndex = info.atomIndex; line = textBuffer.readLine(); //console.log(atomInfos, atomIndex, atomInfos[atomIndex]); atomInfos[atomIndex].symbol = line.trim(); } } result.countInfo = countInfo; result.atomInfos = atomInfos; result.bondInfos = bondInfos; result.sgInfos = sgroupInfos; return result; }, /** * Read count line of CTAB. * @private */ analysisCountLine: function(line) { var result = {}; // line format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv // vvvvvv: version flag, check if it is V2000 var s = line.substr(33, 6).trim(); if (s && s != Kekule.IO.MDL.VER2000) // wrong version flag, it may be not a CTAB 2000, raise exception // Note: sometime Version mark may be missing (s is empty), assume V2000 { Kekule.error(/*Kekule.ErrorMsg.NOT_MDL2000_FORMAT_DATA*/Kekule.$L('ErrorMsg.NOT_MDL2000_FORMAT_DATA')); return null; } // aaa: number of atoms s = line.substr(0, 3); result.atomCount = parseInt(s, 10); // bbb: number of bonds s = line.substr(3, 3); result.bondCount = parseInt(s, 10); // lll: atomList count, used for query, currently bypass // fff: obsolete // ccc: chiral flag: 0=not chiral, 1=chiral s = line.substr(12, 3); result.isChiral = (parseInt(s, 10) != 0); // sss: number of stext entries (for ISIS/Desktop) s = line.substr(15, 3); result.stextCount = parseInt(s, 10); // xxxrrrpppiii: obsolete // mmm: No longer supported, the default is set to 999, ignore here return result; }, /** * Analysis a line of atom block in CTAB. * @return {Hash} Info about this atom. * @private */ analysisAtomLine: function(line) { var result = {}; // line format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee // xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz: x,y,z-coord var s = line.substr(0, 10); result.x = parseFloat(s); s = line.substr(10, 10); result.y = parseFloat(s); s = line.substr(20, 10); result.z = parseFloat(s); // aaa: atom symbol s = line.substr(31, 3); result.symbol = s.trim(); // dd: mass difference s = line.substr(34, 2); result.massDiff = parseInt(s, 10); // ccc: charge s = line.substr(36, 3); var i = parseInt(s, 10); var rad = Kekule.IO.Mdl2kUtils.atomLineChargeToRadical(i); if (rad) // ccc is 4, means doublet radical result.radical = rad; else // ccc is charge result.charge = Kekule.IO.Mdl2kUtils.atomLineChargeToKekule(i); // sss: atom stereo parity s = line.substr(39, 3); var i = parseInt(s, 10); if (i) result.parity = i; //s = line.substr(39, 3); // hhh: hydrogen count + 1, used for query. Need to be handled here???? s = line.substr(42, 3); var i = parseInt(s, 10); if (i > 0) result.hydrongenCount = i - 1; // bbb: stereo care box, used for query, ignore // vvv: // TODO: valence, ignore here // HHH: H0 designator, 0 = not specified, 1 = no H atoms allowed, ignored // rrriii: not used // mmm: atom-atom mapping number, used for reaction, ignored currently // nnn: inversion/retention flag, used for reaction, ignored currently // eee: exact change flag, 0 = property not applied, 1 = change on atom must be exactly as shown // used for reaction or query, ignored currrenly return result; }, /** * Analysis a line of bond block in CTAB. * @return {Hash} Info about this bond. * @private */ analysisBondLine: function(line) { var result = {}; // line format: 111222tttsssxxxrrrccc // 111222: first and second atom number var s = line.substr(0, 3); result.atomIndex1 = parseInt(s, 10) - 1; s = line.substr(3, 3); result.atomIndex2 = parseInt(s, 10) - 1; //ttt: bond type s = line.substr(6, 3); result.order = Kekule.IO.MdlUtils.bondTypeToKekuleOrder(parseInt(s, 10)); //sss: bond stereo s = line.substr(9, 3); var stereo = Kekule.IO.Mdl2kUtils.bondStereoToKekule(parseInt(s, 10), result.order); if (stereo) result.stereo = stereo; // xxx: not used // rrr: bond topology, 0 = Either, 1 = Ring, 2 = Chain, SSS queries only, ignored // ccc: reacting center status, for reaction and query, ignored return result; }, /** * Analysis a line of atom list block in CTAB. * @return {Hash} Info about this atom list. * @private */ analysisAtomListInfoLine: function(line) { var result = {}; // line format: aaa kSSSSn 111 222 333 444 555 // aaa: number of atom (L) where list is attached var s = line.substr(0, 3); result.atomIndex = parseInt(s, 10) - 1; // k: = T = [NOT] list, = F = normal list s = line.substr(4, 1); result.isAllowList = (s.toLowerCase() == 'f'); // SSSS: space // n: number of entries in list, max is 5 s = line.substr(9, 1); var entryCount = Math.min(parseInt(s, 10), 5); // 111 222 333 444 555: atomic number of each atom on the list result.atomicNumbers = []; var start = 11; for (var i = 0; i < entryCount; ++i) { start += 4 * i; s = line.substr(start, 3); var atomicNumber = parseInt(s, 10); if (atomicNumber) result.atomicNumbers.push(atomicNumber); } return result; }, /** * Analysis a line of properties block in CTAB. * @param {String} line * @param {Array} atomInfos Atom info array read from atom block * @param {Array} sgroupInfos Sgroup info that may be filled in property list. * @return {Hash} Info about this property. * @private */ analysisPropertyLine: function(line, atomInfos, sgroupInfos) { var result = {}; // generally the line has a "M XXXnnx aaa" start, but A aaa, V aaa, G aaa or S SKPnnn is also legal. var slead = line.substr(0, 1); result.leading = slead; if (slead == 'M') { var spropId = line.substr(3, 3).trim(); result.prop = spropId; var atomIndex, sgIndex, sgInfo; if (spropId === 'END') // M END { return result; } if (['CHG', 'RAD', 'ISO'].indexOf(spropId) >= 0) // these tags may contain multiple entries in a line { var entryCount = parseInt(line.substr(6, 3).trim()); result.entryCount = entryCount; result.entries = []; var propName = (spropId === 'CHG')? 'charge': (spropId === 'RAD')? 'radical': 'massNumber'; var currPos = 10; for (var i = 0; i < entryCount; ++i) { var atomIndex = parseInt(line.substr(currPos, 3).trim(), 10) - 1; var propValue = parseInt(line.substr(currPos + 4, 3).trim(), 10); //console.log(atomIndex, propName, propValue, atomInfos[atomIndex]); currPos += 8; result.entries.push({'atomIndex': atomIndex, propName: propValue}); atomInfos[atomIndex][propName] = propValue; } //console.log(atomInfos); /* var satom = line.substr(10, 3).trim(); atomIndex = parseInt(satom, 10) - 1; result.atomIndex = atomIndex; */ } else if (['STY', 'SLB'].indexOf(spropId) >= 0) { sgIndex = parseInt(line.substr(10, 3), 10) - 1; sgInfo = sgroupInfos[sgIndex]; } else if (['SAL', 'SBL', 'SMT', 'SCL', 'SAP', 'SBV'].indexOf(spropId) >= 0) { sgIndex = parseInt(line.substr(7, 3), 10) - 1; sgInfo = sgroupInfos[sgIndex]; } /* var svalues = []; for (var i = 0; i < entryCount; ++i) { var svalue = line.substr(14 + i * 4, 3).trim(); svalues.push(svalue); } */ var valueStart = 14; // start position of actual infos switch (spropId) { /* case 'CHG': // charge { result.charge = parseInt(line.substr(valueStart, 3).trim(), 10); atomInfos[atomIndex].charge = result.charge; break; } case 'RAD': // radical { result.radical = parseInt(line.substr(valueStart, 3).trim(), 10); atomInfos[atomIndex].radical = Kekule.IO.MdlUtils.mdlRadicalToKekule(result.radical); break; } case 'ISO': // isotope { result.massNumber = parseInt(line.substr(valueStart, 3).trim(), 10); atomInfos[atomIndex].massNumber = result.massNumber; break; } */ case 'ALS': // atom list { var atomListInfo = {}; var satom = line.substr(7, 3).trim(); atomListInfo.atomIndex = parseInt(satom, 10) - 1; result.atomIndex = atomListInfo.atomIndex; //var count = parseInt(Math.min(line.substr(10, 3), 16)); var count = parseInt(line.substr(10, 3)); // allow more than 16 (unstandard data) var s = line.substr(14, 1); atomListInfo.isAllowList = (s.toLowerCase() == 'f'); atomListInfo.symbols = []; for (var i = 0; i < count; ++i) { s = line.substr(16 + i * 4, 4).trim(); atomListInfo.symbols.push(s); } result.atomListInfo = atomListInfo; // may be there is ALS line for atomIndex before... var oldAtomListInfo = atomInfos[atomListInfo.atomIndex].atomListInfo; if (!oldAtomListInfo) atomInfos[atomListInfo.atomIndex].atomListInfo = atomListInfo; else // merge old and new { if (oldAtomListInfo.isAllowList !== atomListInfo.isAllowList) // can not merge, overwrite atomInfos[atomListInfo.atomIndex].atomListInfo = atomListInfo; else // merge { oldAtomListInfo.symbols = Kekule.ArrayUtils.pushUnique(oldAtomListInfo.symbols, atomListInfo.symbols); } } break; } /* case 'APO': // attach point for RGroup, M APOnn2 aaa vvv, ignored currently case 'AAL': // Atom Attachment Order, M AAL aaann2 111 v1v 222 v2v, ignored currently case 'RGP': // Rgroup Label Location, M RGPnn8 aaa rrr, ignored case 'LOG': // Rgroup Logic, Unsatisfied Sites, Range of Occurrence. ignored case 'SST': // Sgroup Subtype, M SSTnn8 sss ttt, ignored case 'SCN': // Sgroup Connectivity, M SCNnn8 sss ttt, ignored case 'SDS': // Sgroup Expansion, M SDS EXPn15 sss ..., ignored case 'SPA': // Multiple Group Parent Atom List, M SPA sssn15 aaa ..., ignored // ... a packs of Sgroup properties are ignored { break; } */ case 'STY': // Sgroup type, M STYnn8 sss ttt, only handle SUP (abbreviation Sgroup) { //var sgIndex = parseInt(line.substr(10, 3), 10) - 1; var sgType = line.substr(14, 3); if (sgType == 'SUP') { sgroupInfos[sgIndex] = {'sgType': 'SUP'}; } break; } case 'SLB': // Sgroup Labels, M SLBnn8 sss vvv { //var sgIndex = parseInt(line.substr(10, 3), 10) - 1; //var sgInfo = sgroupInfos[sgIndex]; if (sgInfo) sgInfo.labelId = line.substr(14).trim(); break; } case 'SAL': // Sgroup Atom List, M SAL sssn15 aaa ... { //var sgIndex = parseInt(line.substr(7, 3), 10) - 1; //var sgInfo = sgroupInfos[sgIndex]; if (sgInfo) { //var atomCount = parseInt(Math.min(line.substr(10, 3), 15)); var atomCount = parseInt(line.substr(10, 3)); // allow non-standard format (more than 15) if (!sgInfo.atomIndexes) // maybe sg atomIndexes is already created when file has more than one SAL lines sgInfo.atomIndexes = []; for (var i = 0; i < atomCount; ++i) { var s = line.substr(14 + i * 4, 3); var atomIndex = parseInt(s) - 1; sgInfo.atomIndexes.push(atomIndex); } //console.log('SAL', atomCount, sgInfo.atomIndexes); } break; } case 'SBL': // Sgroup Bond List, M SBL sssn15 bbb ... // bond connect to group { //var sgIndex = parseInt(line.substr(7, 3), 10) - 1; //var sgInfo = sgroupInfos[sgIndex]; if (sgInfo && (sgInfo.sgType == 'SUP')) { //var bondCount = parseInt(Math.min(line.substr(10, 3), 15)); var bondCount = parseInt(line.substr(10, 3)); // allow more than 15 one (unstandard when reading) if (!sgInfo.crossBondIndexes) // maybe sg xbond indexes is already created when file has more than one SBL lines sgInfo.crossBondIndexes = []; for (var i = 0; i < bondCount; ++i) { var s = line.substr(14 + i * 4, 3); var bondIndex = parseInt(s) - 1; sgInfo.crossBondIndexes.push(bondIndex); } } break; } case 'SMT': // Sgroup Subscript, M SMT sss m... { //var sgIndex = parseInt(line.substr(7, 3), 10) - 1; //var sgInfo = sgroupInfos[sgIndex]; if (sgInfo) { if (sgInfo.sgType == 'SUP') sgInfo.label = line.substr(11).trim(); else sgInfo.subscript = line.substr(11).trim(); } break; } case 'SCL': // Abbreviation Sgroup Class, M SCL sss d... { if (sgInfo) { sgInfo.sgClass = line.substr(11).trim(); } break; } case 'SBV': // Abbreviation Sgroup Bond and Vector Information, M SBV sss bb1 x1 y1 { if (sgInfo) { var vecBondIndex = parseInt(line.substr(11, 3), 10) - 1; if (!sgInfo.bondVectors) sgInfo.bondVectors = []; sgInfo.bondVectors[vecBondIndex] = { 'bondIndex': vecBondIndex, 'x': parseFloat(line.substr(14, 10)), 'y': parseFloat(line.substr(24, 10)) }; var sz = line.substr(34, 10).trim(); if (sz) { sgInfo.bondVectors[vecBondIndex].z = parseFloat(sz) || 0; } } break; } /* case 'SAP': // Abbreviation Sgroup Attachment Point, M SAP sssnn6 iii ooo cc, how to handled? { break; } case '$3D': // M $3Dnnn, 3D property, how to handled? { break; } case 'END': // end of block { // do nothing break; } */ } } else if (slead === 'A') // atom alias in ISIS Draw { var currPos = 3; var atomIndex = parseInt(line.substr(currPos, 3).trim(), 10) - 1; //result.prop = 'A'; result.atomIndex = atomIndex; } else if (slead === 'V') // atom value in ISIS Draw, ignored { } else if (slead === 'G') // TODO: Group abbreviation in ISIS Draw, currently ignored { } else if (slead === 'S') // S SKPnnn, skip tag { var spropId = line.substr(3, 3).trim(); result.prop = spropId; result.count = parseInt(line.substr(6, 3), 10); } return result; } }); /** * Class to write MDL 3000 Connection Table block. * @class * @augments Kekule.IO.Mdl3kBlockWriter * @private */ Kekule.IO.Mdl2kCTabWriter = Class.create(Kekule.IO.MdlBlockWriter, /** @lends Kekule.IO.Mdl2kCTabWriter# */ { /** @private */ CLASS_NAME: 'Kekule.IO.Mdl2kCTabWriter', /** @constructs */ initialize: function(/*$super, */coordMode) { this.tryApplySuper('initialize') /* $super() */; this.setCoordMode(coordMode || Kekule.CoordMode.UNKNOWN); }, /** @private */ initProperties: function() { this.defineProp('coordMode', {'dataType': DataType.INT, 'deaultValue': Kekule.CoordMode.UNKNOWN}); }, /** @private */ doWriteBlock: function(obj, textBuffer) { Kekule.IO.MdlUtils.assertIlegalForCtabOutput(obj); return this.outputCtab(obj, textBuffer); }, /** * Output atoms and bonds in molecule to text data. * @param {Kekule.StructureFragment} mol * @param {Kekule.TextLinesBuffer} textBuffer * @private */ outputCtab: function(mol, textBuffer) { var atomPropLines = []; var molInfo = Kekule.IO.MdlStructureUtils.getMoleculeCtabStructureInfo(mol); // decide coordMode if (this.getCoordMode() != Kekule.CoordMode.UNKNOWN) molInfo.coordMode = this.getCoordMode(); // count line textBuffer.writeLine(this.generateCountLine(molInfo)); // atom block this.outputAtomBlock(mol, molInfo, textBuffer, atomPropLines); // bond block this.outputBondBlock(mol, molInfo, textBuffer); // property lines if (atomPropLines.length) { for (var i = 0, l = atomPropLines.length; i < l; ++i) textBuffer.writeLine(atomPropLines[i]); } // subgroup property lines this.outputSubgroupsPropLines(mol, molInfo, textBuffer); textBuffer.writeLine(Kekule.IO.Mdl2kUtils.getCtabPropLineTag('END')); // property line end /* this.outputPropertyLines(mol, molInfo, textBuffer); */ }, /** * Generate count line string of molecule. * @param {Hash} molInfo Info returned by {@link Kekule.IO.MdlStructureUtils.getMoleculeCtabStructureInfo}. * @returns {String} * @private */ generateCountLine: function(molInfo) { /* var s = ''; // format: format: aaabbblllfffcccsssxxxrrrpppiiimmmvvvvvv // aaa: number of atoms s += molInfo.atoms.length.toString().lpad(3); // bbb: number of bonds s += molInfo.bonds.length.toString().lpad(3); // lll: atomList count, used for query, currently bypass s += '0'.lpad(3); // fff: obsolete s += '0'.lpad(3); // ccc: chiral flag: 0=not chiral, 1=chiral // TODO: currently chiral is not considered s += '0'.lpad(3); // sss: number of stext entries (for ISIS/Desktop) s += '0'.lpad(3); // xxxrrrpppiii: obsolete for (var i = 0; i < 4; ++i) s += '0'.lpad(3); // mmm: No longer supported, the default is set to 999, ignore here s += '999'; // vvvvvv, version tag s += Kekule.IO.MDL.VER2000.lpad(6); return s; */ return Kekule.IO.MdlStructureUtils.generateClassicStyleCountLine(molInfo, Kekule.IO.MdlVersion.V2000); }, /** * Output atom block to textBuffer. * Isotope, charge, radical and so on also need to output extra property line, * those lines are pused into atomPropLines param. * @private */ outputAtomBlock: function(mol, molInfo, textBuffer, atomPropLines) { for (var i = 0, l = molInfo.atoms.length; i < l; ++i) { var atom = molInfo.atoms[i]; var line = this.generateAtomLine(i, molInfo.coordMode, atom, atomPropLines); textBuffer.writeLine(line); } }, /** * Generate a line about atom in atom block. * Isotope, charge, radical and so on also need to output extra property line, * those lines are pused into atomPropLines param. * @private */ generateAtomLine: function(index, coordMode, atom, atomPropLines) { // Coordinate convert function var cf = Kekule.IO.Mdl2kUtils.coordToStr; var s = ''; var sAtomIndex = (index + 1).toString().lpad(3); // format: xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee // xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz: x,y,z-coord var coord = (coordMode == Kekule.CoordMode.COORD3D)? atom.getAbsCoord3D(): atom.getAbsCoord2D(); s += cf(coord.x).lpad(10) + cf(coord.y).lpad(10) + cf(coord.z).lpad(10); // aaa: atom symbol s += ' ' + Kekule.IO.MdlStructureUtils.getAtomTypeStr(atom, true).rpad(3); // 2k mode if (Kekule.IO.MdlStructureUtils.isNodeVariableAtom(atom)) // atom list need additional property line { // M ALS aaannn e 11112222333344445555... var satomList = Kekule.IO.Mdl2kUtils.getAtomListPropLineValue(atom); if (satomList) { var propLine = Kekule.IO.Mdl2kUtils.getCtabPropLineTag('ALS') + ' ' + sAtomIndex + satomList; atomPropLines.push(propLine); } } // dd: mass difference if (atom.getMassNumber && atom.getMassNumber()) { var massDiff = Kekule.IO.Mdl2kUtils.getMassDiff(atom.getIsotope()) || 0; // the node has massNumber property must have isotope s += massDiff.toString().lpad(2); //M ISOnn8 aaa vvv var propLine = Kekule.IO.Mdl2kUtils.generateCtabPropLine( [sAtomIndex, atom.getMassNumber().toString().lpad(3)], 'ISO', 1); atomPropLines.push(propLine); } else s += '0'.lpad(2); // ccc: charge s += Kekule.IO.Mdl2kUtils.chargeOrRadicalToMdlAtomLineValue( atom.getCharge? atom.getCharge(): null, atom.getRadical? atom.getRadical(): null ).toString().lpad(3); if (atom.getCharge && atom.getCharge()) { //M CHGnn8 aaa vvv ... var propLine = Kekule.IO.Mdl2kUtils.generateCtabPropLine( [sAtomIndex, atom.getCharge().toString().lpad(3)], 'CHG', 1); atomPropLines.push(propLine); } if (atom.getRadical && atom.getRadical()) { //M RADnn8 aaa vvv var propLine = Kekule.IO.Mdl2kUtils.generateCtabPropLine( [sAtomIndex, Kekule.IO.MdlUtils.kekuleRadicalToMdl(atom.getRadical()).toString().lpad(3)], 'RAD', 1); atomPropLines.push(propLine); } // sss: atom stereo parity var parity = atom.getParity? (atom.getParity() || 0): 0; s += parity.toString().lpad(3); // hhh: hydrogen count + 1, used for query. Need to be handled here???? var hcount = atom.getExplicitHydrogenCount? atom.getExplicitHydrogenCount(): null; var shcount = Kekule.ObjUtils.isUnset(hcount)? '0': (hcount + 1).toString(); s += shcount.lpad(3); /* // bbb: stereo care box, used for query, ignore s += '0'.lpad(3); // vvv: valence, ignore here s += '0'.lpad(3); // HHH: H0 designator, 0 = not specified, 1 = no H atoms allowed, ignored s += '0'.lpad(3); // rrriii: not used s += '0'.lpad(3); s += '0'.lpad(3); // mmm: atom-atom mapping number, used for reaction, ignored currently s += '0'.lpad(3); // nnn: inversion/retention flag, used for reaction, ignored currently s += '0'.lpad(3); // eee: exact change flag, 0 = property not applied, 1 = change on atom must be exactly as shown // used for reaction or query, ignored currrenly s += '0'.lpad(3); */ for (var i = 0, sfill = '0'.lpad(3); i < 8; ++i) s += sfill; return s; }, /** * Output bond block to textBuffer. * @private */ outputBondBlock: function(mol, molInfo, textBuffer) { for (var i = 0, l = molInfo.bonds.length; i < l; ++i) { var bond = molInfo.bonds[i]; var line = this.generateBondLine(i, bond, molInfo.atoms); textBuffer.writeLine(line); } }, /** * Generate a line about bond in bond block. * @private */ generateBondLine: function(index, bond, atomList) { var s = ''; // format: 111222tttsssxxxrrrccc // check bond stereo first, since it may cause the inverted order of atoms var sBondStereo, stereoInfo; if (bond.getStereo && bond.getBondOrder) { stereoInfo = Kekule.IO.Mdl2kUtils.bondStereoToMdlBondLineValue(bond.getStereo(), bond.getBondOrder(), true); sBondStereo = stereoInfo.stereo.toString().lpad(3); } else { stereoInfo = {}; sBondStereo += '0'.lpad(3); } // 111222: first and second atom number var nodeGroup = Kekule.IO.MdlStructureUtils.splitConnectedNodes(bond); sAtom1 = (atomList.indexOf(nodeGroup.primaryNodes[0]) + 1).toString().lpad(3); sAtom2 = (atomList.indexOf(nodeGroup.primaryNodes[1]) + 1).toString().lpad(3); var sAtom1, sAtom2; if (stereoInfo && stereoInfo.inverted) s += (sAtom2 + sAtom1); else s += (sAtom1 + sAtom2); //ttt: bond type if (bond.getBondOrder) s += Kekule.IO.MdlUtils.kekuleBondOrderToMdlType(bond.getBondOrder()).toString().lpad(3); else s += '0'.lpad(3); //sss: bond stereo s += sBondStereo; // xxx: not used s += '0'.lpad(3); // rrr: bond topology, 0 = Either, 1 = Ring, 2 = Chain, SSS queries only, ignored s += '0'.lpad(3); // ccc: reacting center status, for reaction and query, ignored s += '0'.lpad(3); return s; }, /** * Output property line about subgroups to textBuffer. * @private */ outputSubgroupsPropLines: function(mol, molInfo, textBuffer) { // sub-group (super-atom) for (var i = 0, l = molInfo.subGroups.length; i < l; ++i) { var lines = this.generateSgroupLines(i, molInfo.subGroups[i], molInfo); for (var j = 0, k = lines.length; j < k; ++j) textBuffer.writeLine(lines[j]); } }, /** * Generate lines about sub-group in Sgroup property line block. * @private */ generateSgroupLines: function(index, subGroup, molInfo) { var result = []; var sIndex = (index + 1).toString().lpad(3); // STY, type: M STYnn8 sss ttt ... var s = Kekule.IO.Mdl2kUtils.generateCtabPropLine([sIndex, 'SUP'], 'STY', 1); result.push(s); // SLB, Sgroup Labels, M SLBnn8 sss vvv s = Kekule.IO.Mdl2kUtils.generateCtabPropLine([sIndex, sIndex], 'SLB', 1); result.push(s); // SMT, Sgroup Subscript, M SMT sss m... /* var slabel = subGroup.getAbbr? subGroup.getAbbr(): ( subGroup.getName? subGroup.getName(): null ); */ var slabel = (subGroup.getAbbr && subGroup.getAbbr()) || (subGroup.getFormulaText && subGroup.getFormulaText()) || (subGroup.getName && subGroup.getName()); if (slabel) { s = Kekule.IO.Mdl2kUtils.generateCtabPropLine([sIndex, slabel], 'SMT'); result.push(s); } // SAL, Sgroup Atom List, M SAL sssn15 aaa ... var atomIndexes = []; var atoms = subGroup.getLeafNodes(); for (var i = 0, l = atoms.length; i < l; ++i) { var index = molInfo.atoms.indexOf(atoms[i]); if (index >= 0) atomIndexes.push((index + 1).toString().lpad(3)); } atomIndexes = atomIndexes.sort(); // maxium of 15 atoms are allowed in one line var atomIndexGroups = Kekule.ArrayUtils.divide(atomIndexes, 15); for (var i = 0, l = atomIndexGroups.length; i < l; ++i) { var subAtomIndexes = atomIndexGroups[i]; s = Kekule.IO.Mdl2kUtils.getCtabPropLineTag('SAL') + ' ' + sIndex + subAtomIndexes.length.toString().lpad(3) + ' ' + subAtomIndexes.join(' '); result.push(s); } // SBL, Sgroup xbond List, M SBL sssn15 bbb // SBV, Abbreviation Sgroup Bond and Vector Information, M SBV sss bb1 x1 y1 var xbonds = subGroup.getCrossConnectors(); var xbondIndexes = []; var xbondVectors = []; var cf = Kekule.IO.Mdl2kUtils.coordToStr; for (var i = 0, l = xbonds.length; i < l; ++i) { var index = molInfo.bonds.indexOf(xbonds[i]); if (index >= 0) { xbondIndexes.push((index + 1).toString().lpad(3)); // vector: bondIndex, x,y z var vectorArray = [index + 1]; // index // vector x/y/z calculation var nodeGroup = Kekule.IO.MdlStructureUtils.splitConnectedNodes(xbonds[i]); var atoms = nodeGroup.primaryNodes; var vector = {}; if (atoms.length == 2) { if (molInfo.coordMode == Kekule.CoordMode.COORD2D) { vector = Kekule.CoordUtils.substract(atoms[0].getAbsCoord2D(), atoms[1].getAbsCoord2D()); vectorArray = vectorArray.concat([cf(vector.x).lpad(10), cf(vector.y).lpad(10)]); } else { vector = Kekule.CoordUtils.substract(atoms[0].getAbsCoord3D(), atoms[1].getAbsCoord3D()); vectorArray = vectorArray.concat([cf(vector.x), cf(vector.y), cf(vector.z)]); } } xbondVectors.push(vectorArray); } } xbondIndexes = xbondIndexes.sort(); // SBL M SBL sssn15 bbb // maxium of 15 atoms are allowed in one line var xbondIndexGroups = Kekule.ArrayUtils.divide(xbondIndexes, 15); for (var i = 0, l = xbondIndexGroups.length; i < l; ++i) { var subXbondIndexes = xbondIndexGroups[i]; s = Kekule.IO.Mdl2kUtils.getCtabPropLineTag('SBL') + ' ' + sIndex + subXbondIndexes.length.toString().lpad(3) + ' ' + subXbondIndexes.join(' '); result.push(s); } // SBV M SBV sss bb1 x1 y1 for (var i = 0, l = xbondVectors.length; i < l; ++i) { var vectorIndex = xbondVectors[i].shift(); var sVectors = xbondVectors[i].join(''); s = Kekule.IO.Mdl2kUtils.getCtabPropLineTag('SBV') + ' ' + sIndex + ' ' + vectorIndex.toString().lpad(3) + sVectors; result.push(s); } return result; } });