molstar
Version:
A comprehensive macromolecular library.
268 lines (267 loc) • 10.6 kB
JavaScript
/**
* Copyright (c) 2019-2024 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { CifField } from '../../../mol-io/reader/cif';
import { TokenBuilder, Tokenizer } from '../../../mol-io/reader/common/text/tokenizer';
import { guessElementSymbolTokens } from '../util';
import { Column } from '../../../mol-data/db';
import { areTokensEmpty } from '../../../mol-io/reader/common/text/column/token';
export function getAtomSiteTemplate(data, count) {
const str = () => [];
const ts = () => TokenBuilder.create(data, 2 * count);
return {
index: 0,
group_PDB: ts(),
id: str(),
auth_atom_id: ts(),
label_alt_id: ts(),
auth_comp_id: ts(),
auth_asym_id: ts(),
auth_seq_id: ts(),
pdbx_PDB_ins_code: ts(),
Cartn_x: ts(),
Cartn_y: ts(),
Cartn_z: ts(),
occupancy: ts(),
B_iso_or_equiv: ts(),
type_symbol: ts(),
pdbx_PDB_model_num: str(),
label_entity_id: str(),
partial_charge: ts(),
};
}
export class LabelAsymIdHelper {
constructor(asymIds, modelNums, terIndices, hasAssemblies) {
this.asymIds = asymIds;
this.modelNums = modelNums;
this.terIndices = terIndices;
this.hasAssemblies = hasAssemblies;
this.asymIdCounts = new Map();
this.currModelNum = undefined;
this.currAsymId = '';
this.currLabelAsymId = '';
}
clear() {
this.asymIdCounts.clear();
this.currModelNum = undefined;
}
get(i) {
const asymId = this.asymIds.value(i);
if (this.hasAssemblies)
return asymId;
const modelNum = this.modelNums[i];
if (modelNum !== this.currModelNum) {
this.asymIdCounts.clear();
this.currModelNum = modelNum;
this.currLabelAsymId = asymId;
}
else if (this.currAsymId !== asymId) {
this.currAsymId = asymId;
this.currLabelAsymId = asymId;
}
if (this.asymIdCounts.has(asymId)) {
// only change the chains name if there are TER records
// otherwise assume repeated chain name use is from interleaved chains
// also don't change the chains name if there are assemblies
// as those require the original chain name
if (this.terIndices.has(i)) {
const asymIdCount = this.asymIdCounts.get(asymId) + 1;
this.asymIdCounts.set(asymId, asymIdCount);
this.currLabelAsymId = `${asymId}_${asymIdCount}`;
}
}
else {
this.asymIdCounts.set(asymId, 0);
}
return this.currLabelAsymId;
}
}
export function getAtomSite(sites, labelAsymIdHelper, options) {
labelAsymIdHelper.clear();
const pdbx_PDB_model_num = CifField.ofStrings(sites.pdbx_PDB_model_num);
const auth_asym_id = CifField.ofTokens(sites.auth_asym_id);
const auth_seq_id = CifField.ofTokens(sites.auth_seq_id);
const pdbx_PDB_ins_code = CifField.ofTokens(sites.pdbx_PDB_ins_code);
const auth_atom_id = CifField.ofTokens(sites.auth_atom_id);
const auth_comp_id = CifField.ofTokens(sites.auth_comp_id);
const id = CifField.ofStrings(sites.id);
//
let currModelNum = pdbx_PDB_model_num.str(0);
let currAsymId = auth_asym_id.str(0);
let currSeqId = auth_seq_id.int(0);
let currInsCode = pdbx_PDB_ins_code.str(0);
let currLabelSeqId = currSeqId;
const asymIdCounts = new Map();
const atomIdCounts = new Map();
const labelAsymIds = [];
const labelAtomIds = [];
const labelSeqIds = [];
// serial label_seq_id if there are ins codes
let hasInsCode = false;
for (let i = 0, il = id.rowCount; i < il; ++i) {
if (pdbx_PDB_ins_code.str(i) !== '') {
hasInsCode = true;
break;
}
}
// ensure unique asym ids per model and unique atom ids per seq id
for (let i = 0, il = id.rowCount; i < il; ++i) {
const modelNum = pdbx_PDB_model_num.str(i);
const asymId = auth_asym_id.str(i);
const seqId = auth_seq_id.int(i);
const insCode = pdbx_PDB_ins_code.str(i);
let atomId = auth_atom_id.str(i);
if (modelNum !== currModelNum) {
asymIdCounts.clear();
atomIdCounts.clear();
currModelNum = modelNum;
currAsymId = asymId;
currSeqId = seqId;
currInsCode = insCode;
currLabelSeqId = seqId;
}
else if (currAsymId !== asymId) {
atomIdCounts.clear();
currAsymId = asymId;
currSeqId = seqId;
currInsCode = insCode;
currLabelSeqId = seqId;
}
else if (currSeqId !== seqId) {
atomIdCounts.clear();
if (currSeqId === currLabelSeqId) {
currLabelSeqId = seqId;
}
else {
currLabelSeqId += 1;
}
currSeqId = seqId;
currInsCode = insCode;
}
else if (currInsCode !== insCode) {
atomIdCounts.clear();
currInsCode = insCode;
currLabelSeqId += 1;
}
labelAsymIds[i] = labelAsymIdHelper.get(i);
if (atomIdCounts.has(atomId)) {
const atomIdCount = atomIdCounts.get(atomId) + 1;
atomIdCounts.set(atomId, atomIdCount);
atomId = `${atomId}_${atomIdCount}`;
}
else {
atomIdCounts.set(atomId, 0);
}
labelAtomIds[i] = atomId;
if (hasInsCode) {
labelSeqIds[i] = currLabelSeqId;
}
}
const labelAsymId = Column.ofStringArray(labelAsymIds);
const labelAtomId = Column.ofStringArray(labelAtomIds);
const label_seq_id = hasInsCode
? CifField.ofColumn(Column.ofIntArray(labelSeqIds))
: CifField.ofUndefined(sites.index, Column.Schema.int);
//
return {
auth_asym_id,
auth_atom_id,
auth_comp_id,
auth_seq_id,
B_iso_or_equiv: CifField.ofTokens(sites.B_iso_or_equiv),
Cartn_x: CifField.ofTokens(sites.Cartn_x),
Cartn_y: CifField.ofTokens(sites.Cartn_y),
Cartn_z: CifField.ofTokens(sites.Cartn_z),
group_PDB: CifField.ofTokens(sites.group_PDB),
id,
label_alt_id: CifField.ofTokens(sites.label_alt_id),
label_asym_id: CifField.ofColumn(labelAsymId),
label_atom_id: CifField.ofColumn(labelAtomId),
label_comp_id: auth_comp_id,
label_seq_id,
label_entity_id: CifField.ofStrings(sites.label_entity_id),
occupancy: areTokensEmpty(sites.occupancy) ? CifField.ofUndefined(sites.index, Column.Schema.float) : CifField.ofTokens(sites.occupancy),
type_symbol: CifField.ofTokens(sites.type_symbol),
pdbx_PDB_ins_code: CifField.ofTokens(sites.pdbx_PDB_ins_code),
pdbx_PDB_model_num,
partial_charge: CifField.ofTokens(sites.partial_charge)
};
}
export function addAtom(sites, model, data, s, e, isPdbqt) {
const { data: str } = data;
const length = e - s;
// TODO: filter invalid atoms
// COLUMNS DATA TYPE CONTENTS
// --------------------------------------------------------------------------------
// 1 - 6 Record name "ATOM "
TokenBuilder.addToken(sites.group_PDB, Tokenizer.trim(data, s, s + 6));
// 7 - 11 Integer Atom serial number.
// TODO: support HEX
Tokenizer.trim(data, s + 6, s + 11);
sites.id[sites.index] = data.data.substring(data.tokenStart, data.tokenEnd);
// 13 - 16 Atom Atom name.
TokenBuilder.addToken(sites.auth_atom_id, Tokenizer.trim(data, s + 12, s + 16));
// 17 Character Alternate location indicator.
if (str.charCodeAt(s + 16) === 32) { // ' '
TokenBuilder.add(sites.label_alt_id, 0, 0);
}
else {
TokenBuilder.add(sites.label_alt_id, s + 16, s + 17);
}
// 18 - 20 Residue name Residue name.
TokenBuilder.addToken(sites.auth_comp_id, Tokenizer.trim(data, s + 17, s + 20));
// 22 Character Chain identifier.
TokenBuilder.add(sites.auth_asym_id, s + 21, s + 22);
// 23 - 26 Integer Residue sequence number.
// TODO: support HEX
TokenBuilder.addToken(sites.auth_seq_id, Tokenizer.trim(data, s + 22, s + 26));
// 27 AChar Code for insertion of residues.
if (str.charCodeAt(s + 26) === 32) { // ' '
TokenBuilder.add(sites.pdbx_PDB_ins_code, 0, 0);
}
else {
TokenBuilder.add(sites.pdbx_PDB_ins_code, s + 26, s + 27);
}
// 31 - 38 Real(8.3) Orthogonal coordinates for X in Angstroms.
TokenBuilder.addToken(sites.Cartn_x, Tokenizer.trim(data, s + 30, s + 38));
// 39 - 46 Real(8.3) Orthogonal coordinates for Y in Angstroms.
TokenBuilder.addToken(sites.Cartn_y, Tokenizer.trim(data, s + 38, s + 46));
// 47 - 54 Real(8.3) Orthogonal coordinates for Z in Angstroms.
TokenBuilder.addToken(sites.Cartn_z, Tokenizer.trim(data, s + 46, s + 54));
// 55 - 60 Real(6.2) Occupancy.
TokenBuilder.addToken(sites.occupancy, Tokenizer.trim(data, s + 54, s + 60));
// 61 - 66 Real(6.2) Temperature factor (Default = 0.0).
if (length >= 66) {
TokenBuilder.addToken(sites.B_iso_or_equiv, Tokenizer.trim(data, s + 60, s + 66));
}
else {
TokenBuilder.add(sites.B_iso_or_equiv, 0, 0);
}
// 73 - 76 LString(4) Segment identifier, left-justified.
if (isPdbqt) {
TokenBuilder.addToken(sites.partial_charge, Tokenizer.trim(data, s + 70, s + 76));
}
else {
// ignored
}
// 77 - 78 LString(2) Element symbol, right-justified.
if (length >= 78 && !isPdbqt) {
Tokenizer.trim(data, s + 76, s + 78);
if (data.tokenStart < data.tokenEnd) {
TokenBuilder.addToken(sites.type_symbol, data);
}
else {
guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16);
}
}
else {
guessElementSymbolTokens(sites.type_symbol, str, s + 12, s + 16);
}
// 79 - 80 LString(2) charge Charge on the atom.
// TODO
sites.pdbx_PDB_model_num[sites.index] = model;
sites.index++;
}