molstar
Version:
A comprehensive macromolecular library.
288 lines • 16.8 kB
JavaScript
/**
* Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Zepei Xu <xuzepei19950617@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { __awaiter, __generator } from "tslib";
// NOTES
// When want to created undefined string column, must use
// undefStr = UndefinedColumn(molecule.num_atoms, ColumnType.str)
// but not
// const undefPooledStr = UndefinedColumn(molecule.num_atoms, ColumnType.pooledStr);
// because latter actuall return a column of zeros
import { Column } from '../../../mol-data/db';
import { TokenBuilder, Tokenizer } from '../common/text/tokenizer';
import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
import { ReaderResult as Result } from '../result';
import { Task, chunkedSubtask } from '../../../mol-task';
var skipWhitespace = Tokenizer.skipWhitespace, eatValue = Tokenizer.eatValue, markLine = Tokenizer.markLine, getTokenString = Tokenizer.getTokenString, readLine = Tokenizer.readLine;
function createEmptyMolecule() {
return {
mol_name: '',
num_atoms: 0,
num_bonds: 0,
num_subst: 0,
num_feat: 0,
num_sets: 0,
mol_type: '',
charge_type: '',
status_bits: '',
mol_comment: ''
};
}
function State(tokenizer, runtimeCtx) {
return {
tokenizer: tokenizer,
molecule: createEmptyMolecule(),
runtimeCtx: runtimeCtx
};
}
var reWhitespace = /\s+/g;
function handleMolecule(state) {
var tokenizer = state.tokenizer, molecule = state.molecule;
while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
markLine(tokenizer);
molecule.mol_name = getTokenString(tokenizer);
markLine(tokenizer);
var values = getTokenString(tokenizer).trim().split(reWhitespace);
molecule.num_atoms = parseInt(values[0]);
molecule.num_bonds = parseInt(values[1]);
molecule.num_subst = parseInt(values[2]);
molecule.num_feat = parseInt(values[3]);
molecule.num_sets = parseInt(values[4]);
markLine(tokenizer);
var mol_type = getTokenString(tokenizer);
if (mol_type.startsWith('@<TRIPOS>'))
return;
molecule.mol_type = mol_type;
markLine(tokenizer);
var charge_type = getTokenString(tokenizer);
if (charge_type.startsWith('@<TRIPOS>'))
return;
molecule.charge_type = charge_type;
markLine(tokenizer);
var status_bits = getTokenString(tokenizer);
if (status_bits.startsWith('@<TRIPOS>'))
return;
molecule.status_bits = status_bits;
markLine(tokenizer);
var mol_comment = getTokenString(tokenizer);
if (mol_comment.startsWith('@<TRIPOS>'))
return;
molecule.mol_comment = mol_comment;
}
function handleAtoms(state) {
return __awaiter(this, void 0, void 0, function () {
var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, atom_idTokens, atom_nameTokens, xTokens, yTokens, zTokens, atom_typeTokens, subst_idTokens, subst_nameTokens, chargeTokens, status_bitTokens, undefFloat, undefInt, undefStr, length, linesAlreadyRead, ret;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = state.tokenizer, molecule = state.molecule;
// skip empty lines and '@<TRIPOS>ATOM'
while (getTokenString(tokenizer) !== '@<TRIPOS>ATOM' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
initialTokenizerPosition = tokenizer.position;
initialTokenizerLineNumber = tokenizer.lineNumber;
firstLine = readLine(tokenizer);
firstLineArray = firstLine.trim().split(/\s+/g);
columnCount = firstLineArray.length;
atom_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
atom_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
xTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
yTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
zTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
atom_typeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
subst_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
subst_nameTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
chargeTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_atoms * 2);
undefFloat = Column.Undefined(molecule.num_atoms, Column.Schema.float);
undefInt = Column.Undefined(molecule.num_atoms, Column.Schema.int);
undefStr = Column.Undefined(molecule.num_atoms, Column.Schema.str);
tokenizer.position = initialTokenizerPosition;
tokenizer.lineNumber = initialTokenizerLineNumber;
length = tokenizer.length;
linesAlreadyRead = 0;
return [4 /*yield*/, chunkedSubtask(state.runtimeCtx, 100000, void 0, function (chunkSize) {
var linesToRead = Math.min(molecule.num_atoms - linesAlreadyRead, chunkSize);
for (var i = 0; i < linesToRead; i++) {
for (var j = 0; j < columnCount; j++) {
skipWhitespace(tokenizer);
tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch (j) {
case 0:
TokenBuilder.addUnchecked(atom_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
TokenBuilder.addUnchecked(atom_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
TokenBuilder.addUnchecked(xTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
TokenBuilder.addUnchecked(yTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 4:
TokenBuilder.addUnchecked(zTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 5:
TokenBuilder.addUnchecked(atom_typeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 6:
TokenBuilder.addUnchecked(subst_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 7:
TokenBuilder.addUnchecked(subst_nameTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 8:
TokenBuilder.addUnchecked(chargeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 9:
TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
linesAlreadyRead += linesToRead;
return linesToRead;
}, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })];
case 1:
_a.sent();
ret = {
count: molecule.num_atoms,
atom_id: TokenColumn(atom_idTokens)(Column.Schema.int),
atom_name: TokenColumn(atom_nameTokens)(Column.Schema.str),
x: TokenColumn(xTokens)(Column.Schema.float),
y: TokenColumn(yTokens)(Column.Schema.float),
z: TokenColumn(zTokens)(Column.Schema.float),
atom_type: columnCount > 5 ? TokenColumn(atom_typeTokens)(Column.Schema.str) : undefStr,
subst_id: columnCount > 6 ? TokenColumn(subst_idTokens)(Column.Schema.int) : undefInt,
subst_name: columnCount > 7 ? TokenColumn(subst_nameTokens)(Column.Schema.str) : undefStr,
charge: columnCount > 8 ? TokenColumn(chargeTokens)(Column.Schema.float) : undefFloat,
status_bit: columnCount > 9 ? TokenColumn(status_bitTokens)(Column.Schema.str) : undefStr,
};
return [2 /*return*/, ret];
}
});
});
}
function handleBonds(state) {
return __awaiter(this, void 0, void 0, function () {
var tokenizer, molecule, initialTokenizerPosition, initialTokenizerLineNumber, firstLine, firstLineArray, columnCount, bond_idTokens, origin_bond_idTokens, target_bond_idTokens, bondTypeTokens, status_bitTokens, length, linesAlreadyRead, ret;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = state.tokenizer, molecule = state.molecule;
while (getTokenString(tokenizer) !== '@<TRIPOS>BOND' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
initialTokenizerPosition = tokenizer.position;
initialTokenizerLineNumber = tokenizer.lineNumber;
firstLine = readLine(tokenizer);
firstLineArray = firstLine.trim().split(/\s+/g);
columnCount = firstLineArray.length;
bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
origin_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
target_bond_idTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
bondTypeTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
status_bitTokens = TokenBuilder.create(tokenizer.data, molecule.num_bonds * 2);
tokenizer.position = initialTokenizerPosition;
tokenizer.lineNumber = initialTokenizerLineNumber;
length = tokenizer.length;
linesAlreadyRead = 0;
return [4 /*yield*/, chunkedSubtask(state.runtimeCtx, 100000, void 0, function (chunkSize) {
var linesToRead = Math.min(molecule.num_bonds - linesAlreadyRead, chunkSize);
for (var i = 0; i < linesToRead; i++) {
for (var j = 0; j < columnCount; j++) {
skipWhitespace(tokenizer);
tokenizer.tokenStart = tokenizer.position;
eatValue(tokenizer);
switch (j) {
case 0:
TokenBuilder.addUnchecked(bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
TokenBuilder.addUnchecked(origin_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
TokenBuilder.addUnchecked(target_bond_idTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
TokenBuilder.addUnchecked(bondTypeTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
default:
TokenBuilder.addUnchecked(status_bitTokens, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
linesAlreadyRead += linesToRead;
return linesToRead;
}, function (ctx) { return ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }); })];
case 1:
_a.sent();
ret = {
count: molecule.num_bonds,
bond_id: TokenColumn(bond_idTokens)(Column.Schema.int),
origin_atom_id: TokenColumn(origin_bond_idTokens)(Column.Schema.int),
target_atom_id: TokenColumn(target_bond_idTokens)(Column.Schema.int),
bond_type: TokenColumn(bondTypeTokens)(Column.Schema.str),
status_bits: columnCount > 4
? TokenColumn(status_bitTokens)(Column.Schema.str)
: Column.Undefined(molecule.num_bonds, Column.Schema.str),
};
return [2 /*return*/, ret];
}
});
});
}
function parseInternal(ctx, data, name) {
return __awaiter(this, void 0, void 0, function () {
var tokenizer, structures, state, atoms, bonds, result;
return __generator(this, function (_a) {
switch (_a.label) {
case 0:
tokenizer = Tokenizer(data);
ctx.update({ message: 'Parsing...', current: 0, max: data.length });
structures = [];
_a.label = 1;
case 1:
if (!(tokenizer.position < data.length)) return [3 /*break*/, 4];
state = State(tokenizer, ctx);
handleMolecule(state);
return [4 /*yield*/, handleAtoms(state)];
case 2:
atoms = _a.sent();
return [4 /*yield*/, handleBonds(state)];
case 3:
bonds = _a.sent();
structures.push({ molecule: state.molecule, atoms: atoms, bonds: bonds });
skipWhitespace(tokenizer);
while (getTokenString(tokenizer) !== '@<TRIPOS>MOLECULE' && tokenizer.position < tokenizer.data.length) {
markLine(tokenizer);
}
return [3 /*break*/, 1];
case 4:
result = { name: name, structures: structures };
return [2 /*return*/, Result.success(result)];
}
});
});
}
export function parseMol2(data, name) {
var _this = this;
return Task.create('Parse MOL2', function (ctx) { return __awaiter(_this, void 0, void 0, function () {
return __generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, parseInternal(ctx, data, name)];
case 1: return [2 /*return*/, _a.sent()];
}
});
}); });
}
//# sourceMappingURL=parser.js.map