UNPKG

molstar

Version:

A comprehensive macromolecular library.

288 lines 16.8 kB
/** * Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Zepei Xu <xuzepei19950617@gmail.com> * @author Alexander Rose <alexander.rose@weirdbyte.de> */ import { __awaiter, __generator } from "tslib"; // NOTES // When want to created undefined string column, must use // undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str) // but not // const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr); // because latter actuall return a column of zeros import { Column } from '../../../mol-data/db'; import { TokenBuilder, Tokenizer } from '../common/text/tokenizer'; import { TokenColumnProvider as TokenColumn } from '../common/text/column/token'; import { ReaderResult as Result } from '../result'; import { Task, chunkedSubtask } from '../../../mol-task'; var skipWhitespace = Tokenizer.skipWhitespace, eatValue = Tokenizer.eatValue, markLine = Tokenizer.markLine, getTokenString = Tokenizer.getTokenString, readLine = Tokenizer.readLine; function createEmptyMolecule() { return { mol_name: '', num_atoms: 0, num_bonds: 0, num_subst: 0, num_feat: 0, num_sets: 0, mol_type: '', charge_type: '', status_bits: '', mol_comment: '' }; } function State(tokenizer, runtimeCtx) { return { tokenizer: tokenizer, molecule: createEmptyMolecule(), runtimeCtx: runtimeCtx }; } var reWhitespace = /\s+/g; function handleMolecule(state) { var tokenizer = state.tokenizer, molecule = state.molecule; while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } markLine(tokenizer); molecule.mol_name = getTokenString(tokenizer); markLine(tokenizer); var values = getTokenString(tokenizer).trim().split(reWhitespace); molecule.num_atoms = parseInt(values[0]); molecule.num_bonds = parseInt(values[1]); molecule.num_subst = parseInt(values[2]); molecule.num_feat = parseInt(values[3]); molecule.num_sets = parseInt(values[4]); markLine(tokenizer); var mol_type = getTokenString(tokenizer); if (mol_type.startsWith('@<TRIPOS>')) return; molecule.mol_type = mol_type; markLine(tokenizer); var charge_type = getTokenString(tokenizer); if (charge_type.startsWith('@<TRIPOS>')) return; molecule.charge_type = charge_type; markLine(tokenizer); var status_bits = getTokenString(tokenizer); if (status_bits.startsWith('@<TRIPOS>')) return; molecule.status_bits = status_bits; markLine(tokenizer); var mol_comment = getTokenString(tokenizer); if (mol_comment.startsWith('@<TRIPOS>')) return; molecule.mol_comment = mol_comment; } function handleAtoms(state) { return __awaiter(this, void 0, void 0, function () { var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, atom_idTokens, atom_nameTokens, xTokens, yTokens, zTokens, atom_typeTokens, subst_idTokens, subst_nameTokens, chargeTokens, status_bitTokens, undefFloat, undefInt, undefStr, length, linesAlreadyRead, ret; return __generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = state.tokenizer, molecule = state.molecule; // skip empty lines and '@<TRIPOS>ATOM' while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } initialTokenizerPosition = tokenizer.position; initialTokenizerLineNumber = tokenizer.lineNumber; firstLine = readLine(tokenizer); firstLineArray = firstLine.trim().split(/\s+/g); columnCount = firstLineArray.length; atom_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); atom_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); xTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); yTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); zTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); atom_typeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); subst_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); subst_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); chargeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2); undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float); undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int); undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str); tokenizer.position = initialTokenizerPosition; tokenizer.lineNumber = initialTokenizerLineNumber; length = tokenizer.length; linesAlreadyRead = 0; return [4 /*yield*/, chunkedSubtask(state.runtimeCtx, 100000, void 0, function (chunkSize) { var linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize); for (var i = 0; i < linesToRead; i++) { for (var j = 0; j < columnCount; j++) { skipWhitespace(tokenizer); tokenizer.tokenStart = tokenizer.position; eatValue(tokenizer); switch (j) { case 0: TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 1: TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 2: TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 3: TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 4: TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 5: TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 6: TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 7: TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 8: TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 9: TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; } } } linesAlreadyRead += linesToRead; return linesToRead; }, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })]; case 1: _a.sent(); ret = { count: molecule.num_atoms, atom_id: TokenColumn(atom_idTokens)(Column.Schema.int), atom_name: TokenColumn(atom_nameTokens)(Column.Schema.str), x: TokenColumn(xTokens)(Column.Schema.float), y: TokenColumn(yTokens)(Column.Schema.float), z: TokenColumn(zTokens)(Column.Schema.float), atom_type: columnCount > 5 ? TokenColumn(atom_typeTokens)(Column.Schema.str) : undefStr, subst_id: columnCount > 6 ? TokenColumn(subst_idTokens)(Column.Schema.int) : undefInt, subst_name: columnCount > 7 ? TokenColumn(subst_nameTokens)(Column.Schema.str) : undefStr, charge: columnCount > 8 ? TokenColumn(chargeTokens)(Column.Schema.float) : undefFloat, status_bit: columnCount > 9 ? TokenColumn(status_bitTokens)(Column.Schema.str) : undefStr, }; return [2 /*return*/, ret]; } }); }); } function handleBonds(state) { return __awaiter(this, void 0, void 0, function () { var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, bond_idTokens, origin_bond_idTokens, target_bond_idTokens, bondTypeTokens, status_bitTokens, length, linesAlreadyRead, ret; return __generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = state.tokenizer, molecule = state.molecule; while (getTokenString(tokenizer) !== '@<TRIPOS>BOND' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } initialTokenizerPosition = tokenizer.position; initialTokenizerLineNumber = tokenizer.lineNumber; firstLine = readLine(tokenizer); firstLineArray = firstLine.trim().split(/\s+/g); columnCount = firstLineArray.length; bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); origin_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); target_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); bondTypeTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2); tokenizer.position = initialTokenizerPosition; tokenizer.lineNumber = initialTokenizerLineNumber; length = tokenizer.length; linesAlreadyRead = 0; return [4 /*yield*/, chunkedSubtask(state.runtimeCtx, 100000, void 0, function (chunkSize) { var linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize); for (var i = 0; i < linesToRead; i++) { for (var j = 0; j < columnCount; j++) { skipWhitespace(tokenizer); tokenizer.tokenStart = tokenizer.position; eatValue(tokenizer); switch (j) { case 0: TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 1: TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 2: TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; case 3: TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; default: TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd); break; } } } linesAlreadyRead += linesToRead; return linesToRead; }, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })]; case 1: _a.sent(); ret = { count: molecule.num_bonds, bond_id: TokenColumn(bond_idTokens)(Column.Schema.int), origin_atom_id: TokenColumn(origin_bond_idTokens)(Column.Schema.int), target_atom_id: TokenColumn(target_bond_idTokens)(Column.Schema.int), bond_type: TokenColumn(bondTypeTokens)(Column.Schema.str), status_bits: columnCount > 4 ? TokenColumn(status_bitTokens)(Column.Schema.str) : Column.Undefined(molecule.num_bonds, Column.Schema.str), }; return [2 /*return*/, ret]; } }); }); } function parseInternal(ctx, data, name) { return __awaiter(this, void 0, void 0, function () { var tokenizer, structures, state, atoms, bonds, result; return __generator(this, function (_a) { switch (_a.label) { case 0: tokenizer = Tokenizer(data); ctx.update({ message: 'Parsing...', current: 0, max: data.length }); structures = []; _a.label = 1; case 1: if (!(tokenizer.position < data.length)) return [3 /*break*/, 4]; state = State(tokenizer, ctx); handleMolecule(state); return [4 /*yield*/, handleAtoms(state)]; case 2: atoms = _a.sent(); return [4 /*yield*/, handleBonds(state)]; case 3: bonds = _a.sent(); structures.push({ molecule: state.molecule, atoms: atoms, bonds: bonds }); skipWhitespace(tokenizer); while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) { markLine(tokenizer); } return [3 /*break*/, 1]; case 4: result = { name: name, structures: structures }; return [2 /*return*/, Result.success(result)]; } }); }); } export function parseMol2(data, name) { var _this = this; return Task.create('Parse MOL2', function (ctx) { return __awaiter(_this, void 0, void 0, function () { return __generator(this, function (_a) { switch (_a.label) { case 0: return [4 /*yield*/, parseInternal(ctx, data, name)]; case 1: return [2 /*return*/, _a.sent()]; } }); }); }); } //# sourceMappingURL=parser.js.map