molstar
Version:
A comprehensive macromolecular library.
450 lines • 18.4 kB
JavaScript
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { __assign } from "tslib";
import { EnumCol, StrCol, IntCol, ListCol, FloatCol, CoordCol, MatrixCol, VectorCol } from './schema';
import { parseImportGet } from './helper';
export function getFieldType(type, description, values, container) {
switch (type) {
// mmCIF
case 'code':
case 'ucode':
case 'line':
case 'uline':
case 'text':
case 'char':
case 'uchar3':
case 'uchar1':
case 'boolean':
return values && values.length ? EnumCol(values, 'str', description) : StrCol(description);
case 'aliasname':
case 'name':
case 'idname':
case 'any':
case 'atcode':
case 'fax':
case 'phone':
case 'email':
case 'code30':
case 'seq-one-letter-code':
case 'author':
case 'orcid_id':
case 'pdbx_PDB_obsoleted_db_id':
case 'pdbx_related_db_id':
case 'sequence_dep':
case 'pdb_id':
case 'emd_id':
// todo, consider adding specialised fields
case 'yyyy-mm-dd':
case 'yyyy-mm-dd:hh:mm':
case 'yyyy-mm-dd:hh:mm-flex':
case 'int-range':
case 'float-range':
case 'binary':
case 'operation_expression':
case 'point_symmetry':
case '4x3_matrix':
case '3x4_matrices':
case 'point_group':
case 'point_group_helical':
case 'symmetry_operation':
case 'date_dep':
case 'url':
case 'symop':
case 'exp_data_doi':
case 'asym_id':
return StrCol(description);
case 'int':
case 'non_negative_int':
case 'positive_int':
return values && values.length ? EnumCol(values, 'int', description) : IntCol(description);
case 'float':
return FloatCol(description);
case 'ec-type':
case 'ucode-alphanum-csv':
case 'id_list':
return ListCol('str', ',', description);
case 'id_list_spc':
return ListCol('str', ' ', description);
// cif
case 'Text':
case 'Code':
case 'Complex':
case 'Symop':
case 'List':
case 'List(Real,Real)':
case 'List(Real,Real,Real,Real)':
case 'Date':
case 'DateTime':
case 'Tag':
case 'Implied':
return wrapContainer('str', ',', description, container);
case 'Real':
return wrapContainer('float', ',', description, container);
case 'Integer':
return wrapContainer('int', ',', description, container);
}
console.log("unknown type '" + type + "'");
return StrCol(description);
}
function ColFromType(type, description) {
switch (type) {
case 'int': return IntCol(description);
case 'str': return StrCol(description);
case 'float': return FloatCol(description);
case 'coord': return CoordCol(description);
}
}
function wrapContainer(type, separator, description, container) {
return container && container === 'List' ? ListCol(type, separator, description) : ColFromType(type, description);
}
function getImportFrames(d, imports) {
var frames = [];
if (!('import' in d.categories))
return frames;
var importGet = parseImportGet(d.categories['import'].getField('get').str(0));
var _loop_1 = function (g) {
var file = g.file, save = g.save;
if (!file || !save) {
console.warn("missing 'save' or 'file' for import in '" + d.header + "'");
return "continue";
}
var importFrames = imports.get(file);
if (!importFrames) {
console.warn("missing '" + file + "' entry in imports");
return "continue";
}
var importSave = importFrames.find(function (id) { return id.header.toLowerCase() === save.toLowerCase(); });
if (!importSave) {
console.warn("missing '" + save + "' save frame in '" + file + "'");
return "continue";
}
frames.push(importSave);
};
for (var _i = 0, importGet_1 = importGet; _i < importGet_1.length; _i++) {
var g = importGet_1[_i];
_loop_1(g);
}
return frames;
}
/** get field from given or linked category */
function getField(category, field, d, imports, ctx) {
var categories = ctx.categories, links = ctx.links;
var cat = d.categories[category];
if (cat) {
return cat.getField(field);
}
else if (d.header in links) {
var linkName = links[d.header];
if (linkName in categories) {
return getField(category, field, categories[linkName], imports, ctx);
}
else {
// console.log(`link '${linkName}' not found`)
}
}
else {
var importFrames = getImportFrames(d, imports);
for (var _i = 0, importFrames_1 = importFrames; _i < importFrames_1.length; _i++) {
var idf = importFrames_1[_i];
return getField(category, field, idf, imports, ctx);
}
}
}
function getEnums(d, imports, ctx) {
var value = getField('item_enumeration', 'value', d, imports, ctx);
var enums = [];
if (value) {
for (var i = 0; i < value.rowCount; ++i) {
enums.push(value.str(i));
// console.log(value.str(i))
}
return enums;
}
else {
// console.log(`item_enumeration.value not found for '${d.header}'`)
}
}
function getContainer(d, imports, ctx) {
var value = getField('type', 'container', d, imports, ctx);
return value ? value.str(0) : undefined;
}
function getCode(d, imports, ctx) {
var code = getField('item_type', 'code', d, imports, ctx) || getField('type', 'contents', d, imports, ctx);
if (code) {
return [code.str(0), getEnums(d, imports, ctx), getContainer(d, imports, ctx)];
}
else {
console.log("item_type.code or type.contents not found for '" + d.header + "'");
}
}
function getSubCategory(d, imports, ctx) {
var value = getField('item_sub_category', 'id', d, imports, ctx);
if (value) {
return value.str(0);
}
}
function getDescription(d, imports, ctx) {
var value = getField('item_description', 'description', d, imports, ctx) || getField('description', 'text', d, imports, ctx);
if (value) {
// trim (after newlines) and remove references to square brackets
return value.str(0).trim()
.replace(/(\r\n|\r|\n)([ \t]+)/g, '\n')
.replace(/(\[[1-3]\])+ element/, 'elements')
.replace(/(\[[1-3]\])+/, '');
}
}
function getAliases(d, imports, ctx) {
var value = getField('item_aliases', 'alias_name', d, imports, ctx) || getField('alias', 'definition_id', d, imports, ctx);
return value ? value.toStringArray().map(function (v) { return v.substr(1); }) : undefined;
}
var reMatrixField = /\[[1-3]\]\[[1-3]\]/;
var reVectorField = /\[[1-3]\]/;
var FORCE_INT_FIELDS = [
'_atom_site.id',
'_atom_site.auth_seq_id',
'_atom_site_anisotrop.id',
'_pdbx_struct_mod_residue.auth_seq_id',
'_struct_conf.beg_auth_seq_id',
'_struct_conf.end_auth_seq_id',
'_struct_conn.ptnr1_auth_seq_id',
'_struct_conn.ptnr2_auth_seq_id',
'_struct_sheet_range.beg_auth_seq_id',
'_struct_sheet_range.end_auth_seq_id',
];
/**
* Note that name and mapped name must share a prefix. This is not always the case in
* the cifCore dictionary, but for downstream code to work a container field with the
* same prefix as the member fields must be given here and in the field names filter
* list.
*/
var FORCE_MATRIX_FIELDS_MAP = {
'atom_site_aniso.u_11': 'u',
'atom_site_aniso.u_22': 'u',
'atom_site_aniso.u_33': 'u',
'atom_site_aniso.u_23': 'u',
'atom_site_aniso.u_13': 'u',
'atom_site_aniso.u_12': 'u',
};
var FORCE_MATRIX_FIELDS = Object.keys(FORCE_MATRIX_FIELDS_MAP);
var EXTRA_ALIASES = {
'atom_site_aniso.matrix_u': [
'atom_site_anisotrop_U',
'atom_site_aniso.U'
],
};
var COMMA_SEPARATED_LIST_FIELDS = [
'_atom_site.pdbx_struct_group_id',
'_chem_comp.mon_nstd_parent_comp_id',
'_diffrn_radiation.pdbx_wavelength_list',
'_diffrn_source.pdbx_wavelength_list',
'_em_diffraction.tilt_angle_list',
'_em_entity_assembly.entity_id_list',
'_entity.pdbx_description',
'_entity.pdbx_ec',
'_entity_poly.pdbx_strand_id',
'_entity_src_gen.pdbx_gene_src_gene',
'_pdbx_depui_entry_details.experimental_methods',
'_pdbx_depui_entry_details.requested_accession_types',
'_pdbx_soln_scatter_model.software_list',
'_pdbx_soln_scatter_model.software_author_list',
'_pdbx_soln_scatter_model.entry_fitting_list',
'_pdbx_struct_assembly_gen.entity_inst_id',
'_pdbx_struct_assembly_gen.asym_id_list',
'_pdbx_struct_assembly_gen.auth_asym_id_list',
'_pdbx_struct_assembly_gen_depositor_info.asym_id_list',
'_pdbx_struct_assembly_gen_depositor_info.chain_id_list',
'_pdbx_struct_group_list.group_enumeration_type',
'_reflns.pdbx_diffrn_id',
'_refine.pdbx_diffrn_id',
'_reflns_shell.pdbx_diffrn_id',
'_struct_keywords.text',
];
var SPACE_SEPARATED_LIST_FIELDS = [
'_chem_comp.pdbx_subcomponent_list',
'_pdbx_soln_scatter.data_reduction_software_list',
'_pdbx_soln_scatter.data_analysis_software_list', // SCTPL5 GNOM
];
var SEMICOLON_SEPARATED_LIST_FIELDS = [
'_chem_comp.pdbx_synonyms' // GLYCERIN; PROPANE-1,2,3-TRIOL
];
/**
* Useful when a dictionary extension will add enum values to an existing dictionary.
* By adding them here, the dictionary extension can be tested before the added enum
* values are available in the existing dictionary.
*/
var EXTRA_ENUM_VALUES = {};
export function generateSchema(frames, imports) {
if (imports === void 0) { imports = new Map(); }
var tables = {};
var aliases = __assign({}, EXTRA_ALIASES);
var categories = {};
var links = {};
var ctx = { categories: categories, links: links };
// get category metadata
frames.forEach(function (d) {
// category definitions in mmCIF start with '_' and don't include a '.'
// category definitions in cifCore don't include a '.'
if (d.header[0] === '_' || d.header.includes('.'))
return;
var categoryName = d.header.toLowerCase();
// console.log(d.header, d.categoryNames, d.categories)
var descriptionField;
var categoryKeyNames = new Set();
if ('category' in d.categories && 'category_key' in d.categories) {
var category = d.categories['category'];
var categoryKey = d.categories['category_key'];
if (categoryKey) {
var categoryKey_names = categoryKey.getField('name');
if (categoryKey_names) {
for (var i = 0, il = categoryKey_names.rowCount; i < il; ++i) {
categoryKeyNames.add(categoryKey_names.str(i));
}
}
}
descriptionField = category.getField('description');
if (categoryKeyNames.size === 0) {
console.log("no key given for category '" + categoryName + "'");
}
}
if ('description' in d.categories) {
descriptionField = d.categories['description'].getField('text');
}
var description = '';
if (descriptionField) {
description = descriptionField.str(0).trim()
.replace(/(\r\n|\r|\n)([ \t]+)/g, '\n'); // remove padding after newlines
}
else {
console.log("no description given for category '" + categoryName + "'");
}
tables[categoryName] = { description: description, key: categoryKeyNames, columns: {} };
// console.log('++++++++++++++++++++++++++++++++++++++++++')
// console.log('name', categoryName)
// console.log('desc', description)
// console.log('key', categoryKeyNames)
});
// build list of links between categories
frames.forEach(function (d) {
if (d.header[0] !== '_' && !d.header.includes('.'))
return;
categories[d.header] = d;
var item_linked = d.categories['item_linked'];
if (item_linked) {
var child_name = item_linked.getField('child_name');
var parent_name = item_linked.getField('parent_name');
if (child_name && parent_name) {
for (var i = 0; i < item_linked.rowCount; ++i) {
var childName = child_name.str(i);
var parentName = parent_name.str(i);
if (childName in links && links[childName] !== parentName) {
console.log(childName + " linked to " + links[childName] + ", ignoring link to " + parentName);
}
links[childName] = parentName;
}
}
}
});
// get field data
Object.keys(categories).forEach(function (fullName) {
var _a;
var d = categories[fullName];
if (!d) {
console.log("'" + fullName + "' not found, moving on");
return;
}
var categoryName = d.header.substring(d.header[0] === '_' ? 1 : 0, d.header.indexOf('.'));
var itemName = d.header.substring(d.header.indexOf('.') + 1);
var fields;
if (categoryName in tables) {
fields = tables[categoryName].columns;
tables[categoryName].key.add(itemName);
}
else if (categoryName.toLowerCase() in tables) {
// take case from category name in 'field' data as it is better if data is from cif dictionaries
tables[categoryName] = tables[categoryName.toLowerCase()];
fields = tables[categoryName].columns;
}
else {
console.log("category '" + categoryName + "' has no metadata");
fields = {};
tables[categoryName] = {
description: '',
key: new Set(),
columns: fields
};
}
var itemAliases = getAliases(d, imports, ctx);
if (itemAliases)
aliases[categoryName + "." + itemName] = itemAliases;
var description = getDescription(d, imports, ctx) || '';
// need to use regex to check for matrix or vector items
// as sub_category assignment is missing for some entries
var subCategory = getSubCategory(d, imports, ctx);
if (subCategory === 'cartesian_coordinate' || subCategory === 'fractional_coordinate') {
fields[itemName] = CoordCol(description);
}
else if (FORCE_INT_FIELDS.includes(d.header)) {
fields[itemName] = IntCol(description);
console.log("forcing int: " + d.header);
}
else if (FORCE_MATRIX_FIELDS.includes(d.header)) {
fields[itemName] = FloatCol(description);
fields[FORCE_MATRIX_FIELDS_MAP[d.header]] = MatrixCol(3, 3, description);
console.log("forcing matrix: " + d.header);
}
else if (subCategory === 'matrix') {
fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description);
}
else if (subCategory === 'vector') {
fields[itemName.replace(reVectorField, '')] = VectorCol(3, description);
}
else {
if (itemName.match(reMatrixField)) {
fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description);
console.log(d.header + " should have 'matrix' _item_sub_category.id");
}
else if (itemName.match(reVectorField)) {
fields[itemName.replace(reVectorField, '')] = VectorCol(3, description);
console.log(d.header + " should have 'vector' _item_sub_category.id");
}
else {
var code = getCode(d, imports, ctx);
if (code) {
var fieldType = getFieldType(code[0], description, code[1], code[2]);
if (fieldType.type === 'str') {
if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ',', description);
console.log("forcing comma separated: " + d.header);
}
else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ' ', description);
console.log("forcing space separated: " + d.header);
}
else if (SEMICOLON_SEPARATED_LIST_FIELDS.includes(d.header)) {
fieldType = ListCol('str', ';', description);
console.log("forcing space separated: " + d.header);
}
}
if (d.header in EXTRA_ENUM_VALUES) {
if (fieldType.type === 'enum') {
(_a = fieldType.values).push.apply(_a, EXTRA_ENUM_VALUES[d.header]);
}
else {
console.warn("expected enum: " + d.header);
}
}
fields[itemName] = fieldType;
}
else {
fields[itemName] = StrCol(description);
// console.log(`could not determine code for '${d.header}'`)
}
}
}
});
return { tables: tables, aliases: aliases };
}
//# sourceMappingURL=cif-dic.js.map