molstar
Version:
A comprehensive macromolecular library.
235 lines (234 loc) • 9.42 kB
JavaScript
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { Task, chunkedSubtask } from '../../../mol-task';
import { Tokenizer, TokenBuilder } from '../common/text/tokenizer';
import { ReaderResult as Result } from '../result';
import { TokenColumnProvider as TokenColumn } from '../common/text/column/token';
import { Column } from '../../../mol-data/db';
const { readLine, skipWhitespace, eatValue, eatLine, markStart } = Tokenizer;
const reWhitespace = /\s+/;
const reTitle = /(^\*|REMARK)*/;
function State(tokenizer, runtimeCtx) {
return {
tokenizer,
runtimeCtx,
};
}
async function handleAtoms(state, count) {
const { tokenizer } = state;
const atomId = TokenBuilder.create(tokenizer.data, count * 2);
const segmentName = TokenBuilder.create(tokenizer.data, count * 2);
const residueId = TokenBuilder.create(tokenizer.data, count * 2);
const residueName = TokenBuilder.create(tokenizer.data, count * 2);
const atomName = TokenBuilder.create(tokenizer.data, count * 2);
const atomType = TokenBuilder.create(tokenizer.data, count * 2);
const charge = TokenBuilder.create(tokenizer.data, count * 2);
const mass = TokenBuilder.create(tokenizer.data, count * 2);
const { position } = tokenizer;
const line = readLine(tokenizer).trim();
tokenizer.position = position;
// LAMMPS full
// AtomID ResID AtomName AtomType Charge Mass Unused0
const isLammpsFull = line.split(reWhitespace).length === 7;
const n = isLammpsFull ? 6 : 8;
const { length } = tokenizer;
let linesAlreadyRead = 0;
await chunkedSubtask(state.runtimeCtx, 100000, void 0, chunkSize => {
const linesToRead = Math.min(count - linesAlreadyRead, chunkSize);
for (let i = 0; i < linesToRead; ++i) {
for (let j = 0; j < n; ++j) {
skipWhitespace(tokenizer);
markStart(tokenizer);
eatValue(tokenizer);
if (isLammpsFull) {
switch (j) {
case 0:
TokenBuilder.addUnchecked(atomId, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
TokenBuilder.addUnchecked(residueId, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
TokenBuilder.addUnchecked(atomName, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
TokenBuilder.addUnchecked(atomType, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 4:
TokenBuilder.addUnchecked(charge, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 5:
TokenBuilder.addUnchecked(mass, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
else {
switch (j) {
case 0:
TokenBuilder.addUnchecked(atomId, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
TokenBuilder.addUnchecked(segmentName, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 2:
TokenBuilder.addUnchecked(residueId, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 3:
TokenBuilder.addUnchecked(residueName, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 4:
TokenBuilder.addUnchecked(atomName, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 5:
TokenBuilder.addUnchecked(atomType, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 6:
TokenBuilder.addUnchecked(charge, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 7:
TokenBuilder.addUnchecked(mass, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
// ignore any extra columns
eatLine(tokenizer);
markStart(tokenizer);
}
linesAlreadyRead += linesToRead;
return linesToRead;
}, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }));
return {
count,
atomId: TokenColumn(atomId)(Column.Schema.int),
segmentName: isLammpsFull
? TokenColumn(residueId)(Column.Schema.str)
: TokenColumn(segmentName)(Column.Schema.str),
residueId: TokenColumn(residueId)(Column.Schema.int),
residueName: isLammpsFull
? TokenColumn(residueId)(Column.Schema.str)
: TokenColumn(residueName)(Column.Schema.str),
atomName: TokenColumn(atomName)(Column.Schema.str),
atomType: TokenColumn(atomType)(Column.Schema.str),
charge: TokenColumn(charge)(Column.Schema.float),
mass: TokenColumn(mass)(Column.Schema.float)
};
}
async function handleBonds(state, count) {
const { tokenizer } = state;
const atomIdA = TokenBuilder.create(tokenizer.data, count * 2);
const atomIdB = TokenBuilder.create(tokenizer.data, count * 2);
const { length } = tokenizer;
let bondsAlreadyRead = 0;
await chunkedSubtask(state.runtimeCtx, 10, void 0, chunkSize => {
const bondsToRead = Math.min(count - bondsAlreadyRead, chunkSize);
for (let i = 0; i < bondsToRead; ++i) {
for (let j = 0; j < 2; ++j) {
skipWhitespace(tokenizer);
markStart(tokenizer);
eatValue(tokenizer);
switch (j) {
case 0:
TokenBuilder.addUnchecked(atomIdA, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
case 1:
TokenBuilder.addUnchecked(atomIdB, tokenizer.tokenStart, tokenizer.tokenEnd);
break;
}
}
}
bondsAlreadyRead += bondsToRead;
return bondsToRead;
}, ctx => ctx.update({ message: 'Parsing...', current: tokenizer.position, max: length }));
return {
count,
atomIdA: TokenColumn(atomIdA)(Column.Schema.int),
atomIdB: TokenColumn(atomIdB)(Column.Schema.int),
};
}
function parseTitle(state, count) {
const title = [];
for (let i = 0; i < count; ++i) {
const line = readLine(state.tokenizer);
title.push(line.replace(reTitle, '').trim());
}
return title;
}
async function parseInternal(data, ctx) {
const tokenizer = Tokenizer(data);
const state = State(tokenizer, ctx);
let title = undefined;
let atoms = undefined;
let bonds = undefined;
const id = readLine(state.tokenizer).trim();
while (tokenizer.tokenEnd < tokenizer.length) {
const line = readLine(state.tokenizer).trim();
if (line.includes('!NTITLE')) {
const numTitle = parseInt(line.split(reWhitespace)[0]);
title = parseTitle(state, numTitle);
}
else if (line.includes('!NATOM')) {
const numAtoms = parseInt(line.split(reWhitespace)[0]);
atoms = await handleAtoms(state, numAtoms);
}
else if (line.includes('!NBOND')) {
const numBonds = parseInt(line.split(reWhitespace)[0]);
bonds = await handleBonds(state, numBonds);
break; // TODO: don't break when the below are implemented
}
else if (line.includes('!NTHETA')) {
// TODO
}
else if (line.includes('!NPHI')) {
// TODO
}
else if (line.includes('!NIMPHI')) {
// TODO
}
else if (line.includes('!NDON')) {
// TODO
}
else if (line.includes('!NACC')) {
// TODO
}
else if (line.includes('!NNB')) {
// TODO
}
else if (line.includes('!NGRP NST2')) {
// TODO
}
else if (line.includes('!MOLNT')) {
// TODO
}
else if (line.includes('!NUMLP NUMLPH')) {
// TODO
}
else if (line.includes('!NCRTERM')) {
// TODO
}
}
if (title === undefined) {
title = [];
}
if (atoms === undefined) {
return Result.error('no atoms data');
}
if (bonds === undefined) {
return Result.error('no bonds data');
}
const result = {
id,
title,
atoms,
bonds
};
return Result.success(result);
}
export function parsePsf(data) {
return Task.create('Parse PSF', async (ctx) => {
return await parseInternal(data, ctx);
});
}