molstar
Version:
A comprehensive macromolecular library.
322 lines (321 loc) • 18.8 kB
JavaScript
"use strict";
/**
* Copyright (c) 2017-2022 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Zepei Xu <xuzepei19950617@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.parseMol2 = void 0;
var tslib_1 = require("tslib");
// NOTES
// When want to created undefined string column, must use
// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
// but not
// const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr);
// because latter actuall return a column of zeros
var db_1 = require("../../../mol-data/db");
var tokenizer_1 = require("../common/text/tokenizer");
var token_1 = require("../common/text/column/token");
var result_1 = require("../result");
var mol_task_1 = require("../../../mol-task");
var skipWhitespace = tokenizer_1.Tokenizer.skipWhitespace, eatValue = tokenizer_1.Tokenizer.eatValue, markLine = tokenizer_1.Tokenizer.markLine, getTokenString = tokenizer_1.Tokenizer.getTokenString, readLine = tokenizer_1.Tokenizer.readLine;
function createEmptyMolecule() {
return {
mol_name: '',
num_atoms: 0,
num_bonds: 0,
num_subst: 0,
num_feat: 0,
num_sets: 0,
mol_type: '',
charge_type: '',
status_bits: '',
mol_comment: ''
};
}
function State(tokenizer, runtimeCtx) {
return {
tokenizer: tokenizer,
molecule: createEmptyMolecule(),
runtimeCtx: runtimeCtx
};
}
var reWhitespace = /\s+/g;
function handleMolecule(state) {
var tokenizer = state.tokenizer, molecule = state.molecule;
while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
markLine(tokenizer);
molecule.mol_name = getTokenString(tokenizer);
markLine(tokenizer);
var values = getTokenString(tokenizer).trim().split(reWhitespace);
molecule.num_atoms = parseInt(values[0]);
molecule.num_bonds = parseInt(values[1]);
molecule.num_subst = parseInt(values[2]);
molecule.num_feat = parseInt(values[3]);
molecule.num_sets = parseInt(values[4]);
markLine(tokenizer);
var mol_type = getTokenString(tokenizer);
if (mol_type.startsWith('@<TRIPOS>'))
return;
molecule.mol_type = mol_type;
markLine(tokenizer);
var charge_type = getTokenString(tokenizer);
if (charge_type.startsWith('@<TRIPOS>'))
return;
molecule.charge_type = charge_type;
markLine(tokenizer);
var status_bits = getTokenString(tokenizer);
if (status_bits.startsWith('@<TRIPOS>'))
return;
molecule.status_bits = status_bits;
markLine(tokenizer);
var mol_comment = getTokenString(tokenizer);
if (mol_comment.startsWith('@<TRIPOS>'))
return;
molecule.mol_comment = mol_comment;
}
function handleAtoms(state) {
return tslib_1.__awaiter(this, void 0, void 0, function () {
var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, atom_idTokens, atom_nameTokens, xTokens, yTokens, zTokens, atom_typeTokens, subst_idTokens, subst_nameTokens, chargeTokens, status_bitTokens, undefFloat, undefInt, undefStr, length, linesAlreadyRead, ret;
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = state.tokenizer, molecule = state.molecule;
// skip empty lines and '@<TRIPOS>ATOM'
while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
initialTokenizerPosition = tokenizer.position;
initialTokenizerLineNumber = tokenizer.lineNumber;
firstLine = readLine(tokenizer);
firstLineArray = firstLine.trim().split(/\s+/g);
columnCount = firstLineArray.length;
atom_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
atom_nameTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
xTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
yTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
zTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
atom_typeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
subst_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
subst_nameTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
chargeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
status_bitTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
undefFloat = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.float);
undefInt = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.int);
undefStr = db_1.Column.Undefined(molecule.num_atoms, db_1.Column.Schema.str);
tokenizer.position = initialTokenizerPosition;
tokenizer.lineNumber = initialTokenizerLineNumber;
length = tokenizer.length;
linesAlreadyRead = 0;
return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(state.runtimeCtx, 100000, void 0, function (chunkSize) {
var linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize);
for (var i = 0; i < linesToRead; i++) {
for (var j = 0; j < columnCount; j++) {
skipWhitespace(tokenizer);
tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch (j) {
case 0:
tokenizer_1.TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
tokenizer_1.TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
tokenizer_1.TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
tokenizer_1.TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 4:
tokenizer_1.TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 5:
tokenizer_1.TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 6:
tokenizer_1.TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 7:
tokenizer_1.TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 8:
tokenizer_1.TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 9:
tokenizer_1.TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
linesAlreadyRead += linesToRead;
return linesToRead;
}, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })];
case 1:
_a.sent();
ret = {
count: molecule.num_atoms,
atom_id: (0, token_1.TokenColumnProvider)(atom_idTokens)(db_1.Column.Schema.int),
atom_name: (0, token_1.TokenColumnProvider)(atom_nameTokens)(db_1.Column.Schema.str),
x: (0, token_1.TokenColumnProvider)(xTokens)(db_1.Column.Schema.float),
y: (0, token_1.TokenColumnProvider)(yTokens)(db_1.Column.Schema.float),
z: (0, token_1.TokenColumnProvider)(zTokens)(db_1.Column.Schema.float),
atom_type: columnCount > 5 ? (0, token_1.TokenColumnProvider)(atom_typeTokens)(db_1.Column.Schema.str) : undefStr,
subst_id: columnCount > 6 ? (0, token_1.TokenColumnProvider)(subst_idTokens)(db_1.Column.Schema.int) : undefInt,
subst_name: columnCount > 7 ? (0, token_1.TokenColumnProvider)(subst_nameTokens)(db_1.Column.Schema.str) : undefStr,
charge: columnCount > 8 ? (0, token_1.TokenColumnProvider)(chargeTokens)(db_1.Column.Schema.float) : undefFloat,
status_bit: columnCount > 9 ? (0, token_1.TokenColumnProvider)(status_bitTokens)(db_1.Column.Schema.str) : undefStr,
};
return [2 /*return*/, ret];
}
});
});
}
function handleBonds(state) {
return tslib_1.__awaiter(this, void 0, void 0, function () {
var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, bond_idTokens, origin_bond_idTokens, target_bond_idTokens, bondTypeTokens, status_bitTokens, length, linesAlreadyRead, ret;
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = state.tokenizer, molecule = state.molecule;
while (getTokenString(tokenizer) !== '@<TRIPOS>BOND' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
initialTokenizerPosition = tokenizer.position;
initialTokenizerLineNumber = tokenizer.lineNumber;
firstLine = readLine(tokenizer);
firstLineArray = firstLine.trim().split(/\s+/g);
columnCount = firstLineArray.length;
bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
origin_bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
target_bond_idTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
bondTypeTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
status_bitTokens = tokenizer_1.TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
tokenizer.position = initialTokenizerPosition;
tokenizer.lineNumber = initialTokenizerLineNumber;
length = tokenizer.length;
linesAlreadyRead = 0;
return [4 /*yield*/, (0, mol_task_1.chunkedSubtask)(state.runtimeCtx, 100000, void 0, function (chunkSize) {
var linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize);
for (var i = 0; i < linesToRead; i++) {
for (var j = 0; j < columnCount; j++) {
skipWhitespace(tokenizer);
tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch (j) {
case 0:
tokenizer_1.TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
tokenizer_1.TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
tokenizer_1.TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
tokenizer_1.TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
default:
tokenizer_1.TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
linesAlreadyRead += linesToRead;
return linesToRead;
}, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })];
case 1:
_a.sent();
ret = {
count: molecule.num_bonds,
bond_id: (0, token_1.TokenColumnProvider)(bond_idTokens)(db_1.Column.Schema.int),
origin_atom_id: (0, token_1.TokenColumnProvider)(origin_bond_idTokens)(db_1.Column.Schema.int),
target_atom_id: (0, token_1.TokenColumnProvider)(target_bond_idTokens)(db_1.Column.Schema.int),
bond_type: (0, token_1.TokenColumnProvider)(bondTypeTokens)(db_1.Column.Schema.str),
status_bits: columnCount > 4
? (0, token_1.TokenColumnProvider)(status_bitTokens)(db_1.Column.Schema.str)
: db_1.Column.Undefined(molecule.num_bonds, db_1.Column.Schema.str),
};
return [2 /*return*/, ret];
}
});
});
}
function handleCrysin(state) {
var tokenizer = state.tokenizer;
while (tokenizer.position < tokenizer.data.length) {
var l = getTokenString(tokenizer);
if (l === '@<TRIPOS>MOLECULE') {
return;
}
else if (l === '@<TRIPOS>CRYSIN') {
break;
}
else {
markLine(tokenizer);
}
}
if (tokenizer.position >= tokenizer.data.length)
return;
markLine(tokenizer);
var values = getTokenString(tokenizer).trim().split(reWhitespace);
return {
a: parseFloat(values[0]),
b: parseFloat(values[1]),
c: parseFloat(values[2]),
alpha: parseFloat(values[3]),
beta: parseFloat(values[4]),
gamma: parseFloat(values[5]),
spaceGroup: parseInt(values[6], 10),
setting: parseInt(values[7], 10),
};
}
function parseInternal(ctx, data, name) {
return tslib_1.__awaiter(this, void 0, void 0, function () {
var tokenizer, structures, state, atoms, bonds, crysin, result;
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = (0, tokenizer_1.Tokenizer)(data);
ctx.update({ message: 'Parsing...', current: 0, max: data.length });
structures = [];
_a.label = 1;
case 1:
if (!(tokenizer.position < data.length)) return [3 /*break*/, 4];
state = State(tokenizer, ctx);
handleMolecule(state);
return [4 /*yield*/, handleAtoms(state)];
case 2:
atoms = _a.sent();
return [4 /*yield*/, handleBonds(state)];
case 3:
bonds = _a.sent();
crysin = handleCrysin(state);
structures.push({ molecule: state.molecule, atoms: atoms, bonds: bonds, crysin: crysin });
skipWhitespace(tokenizer);
while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
return [3 /*break*/, 1];
case 4:
result = { name: name, structures: structures };
return [2 /*return*/, result_1.ReaderResult.success(result)];
}
});
});
}
function parseMol2(data, name) {
var _this = this;
return mol_task_1.Task.create('Parse MOL2', function (ctx) { return tslib_1.__awaiter(_this, void 0, void 0, function () {
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, parseInternal(ctx, data, name)];
case 1: return [2 /*return*/, _a.sent()];
}
});
}); });
}
exports.parseMol2 = parseMol2;