UNPKG

molstar

Version:

A comprehensive macromolecular library.

322 lines (321 loc) 18.8 kB
"use strict"; /** * Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Zepei Xu <xuzepei19950617@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> */ Object.defineProperty(exports, "__esModule", { value: true }); exports.parseMol2 = void 0; var tslib_1 = require("tslib"); // NOTES // When want to created undefined string column, must use // undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str) // but not // const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); // because latter actuall return a column of zeros var db_1 = require("../../../mol-data/db"); var tokenizer_1 = require("../common/text/tokenizer"); var token_1 = require("../common/text/column/token"); var result_1 = require("../result"); var mol_task_1 = require("../../../mol-task"); var skipWhitespace = tokenizer_1.Tokenizer.skipWhitespace, eatValue = tokenizer_1.Tokenizer.eatValue, markLine = tokenizer_1.Tokenizer.markLine, getTokenString = tokenizer_1.Tokenizer.getTokenString, readLine = tokenizer_1.Tokenizer.readLine; function createEmptyMolecule() { return { mol_name: '', num_atoms: 0, num_bonds: 0, num_subst: 0, num_feat: 0, num_sets: 0, mol_type: '', charge_type: '', status_bits: '', mol_comment: '' }; } function State(tokenizer, runtimeCtx) { return { tokenizer: tokenizer, molecule: createEmptyMolecule(), runtimeCtx: runtimeCtx }; } var reWhitespace = /\s+/g; function handleMolecule(state) { var tokenizer = state.tokenizer, molecule = state.molecule; while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } markLine(tokenizer); molecule.mol_name = getTokenString(tokenizer); markLine(tokenizer); var values = getTokenString(tokenizer).trim().split(reWhitespace); molecule.num_atoms = parseInt(values[0]); molecule.num_bonds = parseInt(values[1]); molecule.num_subst = parseInt(values[2]); molecule.num_feat = parseInt(values[3]); molecule.num_sets = parseInt(values[4]); markLine(tokenizer); var mol_type = getTokenString(tokenizer); if (mol_type.startsWith('@<TRIPOS>')) return; molecule.mol_type = mol_type; markLine(tokenizer); var charge_type = getTokenString(tokenizer); if (charge_type.startsWith('@<TRIPOS>')) return; molecule.charge_type = charge_type; markLine(tokenizer); var status_bits = getTokenString(tokenizer); if (status_bits.startsWith('@<TRIPOS>')) return; molecule.status_bits = status_bits; markLine(tokenizer); var mol_comment = getTokenString(tokenizer); if (mol_comment.startsWith('@<TRIPOS>')) return; molecule.mol_comment = mol_comment; } function handleAtoms(state) { return tslib_1.__awaiter(this, void 0, void 0, function () { var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, atom_idTokens, atom_nameTokens, xTokens, yTokens, zTokens, atom_typeTokens, subst_idTokens, subst_nameTokens, chargeTokens, status_bitTokens, undefFloat, undefInt, undefStr, length, linesAlreadyRead, ret; return tslib_1.__generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = state.tokenizer, molecule = state.molecule; // skip empty lines and '@<TRIPOS>ATOM' while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } initialTokenizerPosition = tokenizer.position; initialTokenizerLineNumber = tokenizer.lineNumber; firstLine = readLine(tokenizer); firstLineArray = firstLine.trim().split(/\s+/g); columnCount = firstLineArray.length; atom_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); atom_nameTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); xTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); yTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); zTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); atom_typeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); subst_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); subst_nameTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); chargeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); status_bitTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); undefFloat = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.float); undefInt = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.int); undefStr = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.str); tokenizer.position = initialTokenizerPosition; tokenizer.lineNumber = initialTokenizerLineNumber; length = tokenizer.length; linesAlreadyRead = 0; return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(state.runtimeCtx, 100000, void 0, function (chunkSize) { var linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize); for (var i = 0; i < linesToRead; i++) { for (var j = 0; j < columnCount; j++) { skipWhitespace(tokenizer); tokenizer.tokenStart = tokenizer.position; eatValue(tokenizer); switch (j) { case 0: tokenizer_1.TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 1: tokenizer_1.TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 2: tokenizer_1.TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 3: tokenizer_1.TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 4: tokenizer_1.TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 5: tokenizer_1.TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 6: tokenizer_1.TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 7: tokenizer_1.TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 8: tokenizer_1.TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 9: tokenizer_1.TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; } } } linesAlreadyRead += linesToRead; return linesToRead; }, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })]; case 1: _a.sent(); ret = { count: molecule.num_atoms, atom_id: (0, token_1.TokenColumnProvider)(atom_idTokens)(db_1.Column.Schema.int), atom_name: (0, token_1.TokenColumnProvider)(atom_nameTokens)(db_1.Column.Schema.str), x: (0, token_1.TokenColumnProvider)(xTokens)(db_1.Column.Schema.float), y: (0, token_1.TokenColumnProvider)(yTokens)(db_1.Column.Schema.float), z: (0, token_1.TokenColumnProvider)(zTokens)(db_1.Column.Schema.float), atom_type: columnCount > 5 ? (0, token_1.TokenColumnProvider)(atom_typeTokens)(db_1.Column.Schema.str) : undefStr, subst_id: columnCount > 6 ? (0, token_1.TokenColumnProvider)(subst_idTokens)(db_1.Column.Schema.int) : undefInt, subst_name: columnCount > 7 ? (0, token_1.TokenColumnProvider)(subst_nameTokens)(db_1.Column.Schema.str) : undefStr, charge: columnCount > 8 ? (0, token_1.TokenColumnProvider)(chargeTokens)(db_1.Column.Schema.float) : undefFloat, status_bit: columnCount > 9 ? (0, token_1.TokenColumnProvider)(status_bitTokens)(db_1.Column.Schema.str) : undefStr, }; return [2 /*return*/, ret]; } }); }); } function handleBonds(state) { return tslib_1.__awaiter(this, void 0, void 0, function () { var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, bond_idTokens, origin_bond_idTokens, target_bond_idTokens, bondTypeTokens, status_bitTokens, length, linesAlreadyRead, ret; return tslib_1.__generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = state.tokenizer, molecule = state.molecule; while (getTokenString(tokenizer) !== '@<TRIPOS>BOND' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } initialTokenizerPosition = tokenizer.position; initialTokenizerLineNumber = tokenizer.lineNumber; firstLine = readLine(tokenizer); firstLineArray = firstLine.trim().split(/\s+/g); columnCount = firstLineArray.length; bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); origin_bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); target_bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); bondTypeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); status_bitTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); tokenizer.position = initialTokenizerPosition; tokenizer.lineNumber = initialTokenizerLineNumber; length = tokenizer.length; linesAlreadyRead = 0; return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(state.runtimeCtx, 100000, void 0, function (chunkSize) { var linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize); for (var i = 0; i < linesToRead; i++) { for (var j = 0; j < columnCount; j++) { skipWhitespace(tokenizer); tokenizer.tokenStart = tokenizer.position; eatValue(tokenizer); switch (j) { case 0: tokenizer_1.TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 1: tokenizer_1.TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 2: tokenizer_1.TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 3: tokenizer_1.TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; default: tokenizer_1.TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; } } } linesAlreadyRead += linesToRead; return linesToRead; }, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })]; case 1: _a.sent(); ret = { count: molecule.num_bonds, bond_id: (0, token_1.TokenColumnProvider)(bond_idTokens)(db_1.Column.Schema.int), origin_atom_id: (0, token_1.TokenColumnProvider)(origin_bond_idTokens)(db_1.Column.Schema.int), target_atom_id: (0, token_1.TokenColumnProvider)(target_bond_idTokens)(db_1.Column.Schema.int), bond_type: (0, token_1.TokenColumnProvider)(bondTypeTokens)(db_1.Column.Schema.str), status_bits: columnCount > 4 ? (0, token_1.TokenColumnProvider)(status_bitTokens)(db_1.Column.Schema.str) : db_1.Column.Undefined(molecule.num_bonds, db_1.Column.Schema.str), }; return [2 /*return*/, ret]; } }); }); } function handleCrysin(state) { var tokenizer = state.tokenizer; while (tokenizer.position < tokenizer.data.length) { var l = getTokenString(tokenizer); if (l === '@<TRIPOS>MOLECULE') { return; } else if (l === '@<TRIPOS>CRYSIN') { break; } else { markLine(tokenizer); } } if (tokenizer.position >= tokenizer.data.length) return; markLine(tokenizer); var values = getTokenString(tokenizer).trim().split(reWhitespace); return { a: parseFloat(values[0]), b: parseFloat(values[1]), c: parseFloat(values[2]), alpha: parseFloat(values[3]), beta: parseFloat(values[4]), gamma: parseFloat(values[5]), spaceGroup: parseInt(values[6], 10), setting: parseInt(values[7], 10), }; } function parseInternal(ctx, data, name) { return tslib_1.__awaiter(this, void 0, void 0, function () { var tokenizer, structures, state, atoms, bonds, crysin, result; return tslib_1.__generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = (0, tokenizer_1.Tokenizer)(data); ctx.update({ message: 'Parsing...', current: 0, max: data.length }); structures = []; _a.label = 1; case 1: if (!(tokenizer.position < data.length)) return [3 /*break*/, 4]; state = State(tokenizer, ctx); handleMolecule(state); return [4 /*yield*/, handleAtoms(state)]; case 2: atoms = _a.sent(); return [4 /*yield*/, handleBonds(state)]; case 3: bonds = _a.sent(); crysin = handleCrysin(state); structures.push({ molecule: state.molecule, atoms: atoms, bonds: bonds, crysin: crysin }); skipWhitespace(tokenizer); while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } return [3 /*break*/, 1]; case 4: result = { name: name, structures: structures }; return [2 /*return*/, result_1.ReaderResult.success(result)]; } }); }); } function parseMol2(data, name) { var _this = this; return mol_task_1.Task.create('Parse MOL2', function (ctx) { return tslib_1.__awaiter(_this, void 0, void 0, function () { return tslib_1.__generator(this, function (_a) { switch (_a.label) { case 0: return [4 /*yield*/, parseInternal(ctx, data, name)]; case 1: return [2 /*return*/, _a.sent()]; } }); }); }); } exports.parseMol2 = parseMol2;