UNPKG

molstar

Version:

A comprehensive macromolecular library.

450 lines 18.4 kB
/** * Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info. * * @author Alexander Rose <alexander.rose@weirdbyte.de> */ import { __assign } from "tslib"; import { EnumCol, StrCol, IntCol, ListCol, FloatCol, CoordCol, MatrixCol, VectorCol } from './schema'; import { parseImportGet } from './helper'; export function getFieldType(type, description, values, container) { switch (type) { // mmCIF case 'code': case 'ucode': case 'line': case 'uline': case 'text': case 'char': case 'uchar3': case 'uchar1': case 'boolean': return values && values.length ? EnumCol(values, 'str', description) : StrCol(description); case 'aliasname': case 'name': case 'idname': case 'any': case 'atcode': case 'fax': case 'phone': case 'email': case 'code30': case 'seq-one-letter-code': case 'author': case 'orcid_id': case 'pdbx_PDB_obsoleted_db_id': case 'pdbx_related_db_id': case 'sequence_dep': case 'pdb_id': case 'emd_id': // todo, consider adding specialised fields case 'yyyy-mm-dd': case 'yyyy-mm-dd:hh:mm': case 'yyyy-mm-dd:hh:mm-flex': case 'int-range': case 'float-range': case 'binary': case 'operation_expression': case 'point_symmetry': case '4x3_matrix': case '3x4_matrices': case 'point_group': case 'point_group_helical': case 'symmetry_operation': case 'date_dep': case 'url': case 'symop': case 'exp_data_doi': case 'asym_id': return StrCol(description); case 'int': case 'non_negative_int': case 'positive_int': return values && values.length ? EnumCol(values, 'int', description) : IntCol(description); case 'float': return FloatCol(description); case 'ec-type': case 'ucode-alphanum-csv': case 'id_list': return ListCol('str', ',', description); case 'id_list_spc': return ListCol('str', ' ', description); // cif case 'Text': case 'Code': case 'Complex': case 'Symop': case 'List': case 'List(Real,Real)': case 'List(Real,Real,Real,Real)': case 'Date': case 'DateTime': case 'Tag': case 'Implied': return wrapContainer('str', ',', description, container); case 'Real': return wrapContainer('float', ',', description, container); case 'Integer': return wrapContainer('int', ',', description, container); } console.log("unknown type '" + type + "'"); return StrCol(description); } function ColFromType(type, description) { switch (type) { case 'int': return IntCol(description); case 'str': return StrCol(description); case 'float': return FloatCol(description); case 'coord': return CoordCol(description); } } function wrapContainer(type, separator, description, container) { return container && container === 'List' ? ListCol(type, separator, description) : ColFromType(type, description); } function getImportFrames(d, imports) { var frames = []; if (!('import' in d.categories)) return frames; var importGet = parseImportGet(d.categories['import'].getField('get').str(0)); var _loop_1 = function (g) { var file = g.file, save = g.save; if (!file || !save) { console.warn("missing 'save' or 'file' for import in '" + d.header + "'"); return "continue"; } var importFrames = imports.get(file); if (!importFrames) { console.warn("missing '" + file + "' entry in imports"); return "continue"; } var importSave = importFrames.find(function (id) { return id.header.toLowerCase() === save.toLowerCase(); }); if (!importSave) { console.warn("missing '" + save + "' save frame in '" + file + "'"); return "continue"; } frames.push(importSave); }; for (var _i = 0, importGet_1 = importGet; _i < importGet_1.length; _i++) { var g = importGet_1[_i]; _loop_1(g); } return frames; } /** get field from given or linked category */ function getField(category, field, d, imports, ctx) { var categories = ctx.categories, links = ctx.links; var cat = d.categories[category]; if (cat) { return cat.getField(field); } else if (d.header in links) { var linkName = links[d.header]; if (linkName in categories) { return getField(category, field, categories[linkName], imports, ctx); } else { // console.log(`link '${linkName}' not found`) } } else { var importFrames = getImportFrames(d, imports); for (var _i = 0, importFrames_1 = importFrames; _i < importFrames_1.length; _i++) { var idf = importFrames_1[_i]; return getField(category, field, idf, imports, ctx); } } } function getEnums(d, imports, ctx) { var value = getField('item_enumeration', 'value', d, imports, ctx); var enums = []; if (value) { for (var i = 0; i < value.rowCount; ++i) { enums.push(value.str(i)); // console.log(value.str(i)) } return enums; } else { // console.log(`item_enumeration.value not found for '${d.header}'`) } } function getContainer(d, imports, ctx) { var value = getField('type', 'container', d, imports, ctx); return value ? value.str(0) : undefined; } function getCode(d, imports, ctx) { var code = getField('item_type', 'code', d, imports, ctx) || getField('type', 'contents', d, imports, ctx); if (code) { return [code.str(0), getEnums(d, imports, ctx), getContainer(d, imports, ctx)]; } else { console.log("item_type.code or type.contents not found for '" + d.header + "'"); } } function getSubCategory(d, imports, ctx) { var value = getField('item_sub_category', 'id', d, imports, ctx); if (value) { return value.str(0); } } function getDescription(d, imports, ctx) { var value = getField('item_description', 'description', d, imports, ctx) || getField('description', 'text', d, imports, ctx); if (value) { // trim (after newlines) and remove references to square brackets return value.str(0).trim() .replace(/(\r\n|\r|\n)([ \t]+)/g, '\n') .replace(/(\[[1-3]\])+ element/, 'elements') .replace(/(\[[1-3]\])+/, ''); } } function getAliases(d, imports, ctx) { var value = getField('item_aliases', 'alias_name', d, imports, ctx) || getField('alias', 'definition_id', d, imports, ctx); return value ? value.toStringArray().map(function (v) { return v.substr(1); }) : undefined; } var reMatrixField = /\[[1-3]\]\[[1-3]\]/; var reVectorField = /\[[1-3]\]/; var FORCE_INT_FIELDS = [ '_atom_site.id', '_atom_site.auth_seq_id', '_atom_site_anisotrop.id', '_pdbx_struct_mod_residue.auth_seq_id', '_struct_conf.beg_auth_seq_id', '_struct_conf.end_auth_seq_id', '_struct_conn.ptnr1_auth_seq_id', '_struct_conn.ptnr2_auth_seq_id', '_struct_sheet_range.beg_auth_seq_id', '_struct_sheet_range.end_auth_seq_id', ]; /** * Note that name and mapped name must share a prefix. This is not always the case in * the cifCore dictionary, but for downstream code to work a container field with the * same prefix as the member fields must be given here and in the field names filter * list. */ var FORCE_MATRIX_FIELDS_MAP = { 'atom_site_aniso.u_11': 'u', 'atom_site_aniso.u_22': 'u', 'atom_site_aniso.u_33': 'u', 'atom_site_aniso.u_23': 'u', 'atom_site_aniso.u_13': 'u', 'atom_site_aniso.u_12': 'u', }; var FORCE_MATRIX_FIELDS = Object.keys(FORCE_MATRIX_FIELDS_MAP); var EXTRA_ALIASES = { 'atom_site_aniso.matrix_u': [ 'atom_site_anisotrop_U', 'atom_site_aniso.U' ], }; var COMMA_SEPARATED_LIST_FIELDS = [ '_atom_site.pdbx_struct_group_id', '_chem_comp.mon_nstd_parent_comp_id', '_diffrn_radiation.pdbx_wavelength_list', '_diffrn_source.pdbx_wavelength_list', '_em_diffraction.tilt_angle_list', '_em_entity_assembly.entity_id_list', '_entity.pdbx_description', '_entity.pdbx_ec', '_entity_poly.pdbx_strand_id', '_entity_src_gen.pdbx_gene_src_gene', '_pdbx_depui_entry_details.experimental_methods', '_pdbx_depui_entry_details.requested_accession_types', '_pdbx_soln_scatter_model.software_list', '_pdbx_soln_scatter_model.software_author_list', '_pdbx_soln_scatter_model.entry_fitting_list', '_pdbx_struct_assembly_gen.entity_inst_id', '_pdbx_struct_assembly_gen.asym_id_list', '_pdbx_struct_assembly_gen.auth_asym_id_list', '_pdbx_struct_assembly_gen_depositor_info.asym_id_list', '_pdbx_struct_assembly_gen_depositor_info.chain_id_list', '_pdbx_struct_group_list.group_enumeration_type', '_reflns.pdbx_diffrn_id', '_refine.pdbx_diffrn_id', '_reflns_shell.pdbx_diffrn_id', '_struct_keywords.text', ]; var SPACE_SEPARATED_LIST_FIELDS = [ '_chem_comp.pdbx_subcomponent_list', '_pdbx_soln_scatter.data_reduction_software_list', '_pdbx_soln_scatter.data_analysis_software_list', // SCTPL5 GNOM ]; var SEMICOLON_SEPARATED_LIST_FIELDS = [ '_chem_comp.pdbx_synonyms' // GLYCERIN; PROPANE-1,2,3-TRIOL ]; /** * Useful when a dictionary extension will add enum values to an existing dictionary. * By adding them here, the dictionary extension can be tested before the added enum * values are available in the existing dictionary. */ var EXTRA_ENUM_VALUES = {}; export function generateSchema(frames, imports) { if (imports === void 0) { imports = new Map(); } var tables = {}; var aliases = __assign({}, EXTRA_ALIASES); var categories = {}; var links = {}; var ctx = { categories: categories, links: links }; // get category metadata frames.forEach(function (d) { // category definitions in mmCIF start with '_' and don't include a '.' // category definitions in cifCore don't include a '.' if (d.header[0] === '_' || d.header.includes('.')) return; var categoryName = d.header.toLowerCase(); // console.log(d.header, d.categoryNames, d.categories) var descriptionField; var categoryKeyNames = new Set(); if ('category' in d.categories && 'category_key' in d.categories) { var category = d.categories['category']; var categoryKey = d.categories['category_key']; if (categoryKey) { var categoryKey_names = categoryKey.getField('name'); if (categoryKey_names) { for (var i = 0, il = categoryKey_names.rowCount; i < il; ++i) { categoryKeyNames.add(categoryKey_names.str(i)); } } } descriptionField = category.getField('description'); if (categoryKeyNames.size === 0) { console.log("no key given for category '" + categoryName + "'"); } } if ('description' in d.categories) { descriptionField = d.categories['description'].getField('text'); } var description = ''; if (descriptionField) { description = descriptionField.str(0).trim() .replace(/(\r\n|\r|\n)([ \t]+)/g, '\n'); // remove padding after newlines } else { console.log("no description given for category '" + categoryName + "'"); } tables[categoryName] = { description: description, key: categoryKeyNames, columns: {} }; // console.log('++++++++++++++++++++++++++++++++++++++++++') // console.log('name', categoryName) // console.log('desc', description) // console.log('key', categoryKeyNames) }); // build list of links between categories frames.forEach(function (d) { if (d.header[0] !== '_' && !d.header.includes('.')) return; categories[d.header] = d; var item_linked = d.categories['item_linked']; if (item_linked) { var child_name = item_linked.getField('child_name'); var parent_name = item_linked.getField('parent_name'); if (child_name && parent_name) { for (var i = 0; i < item_linked.rowCount; ++i) { var childName = child_name.str(i); var parentName = parent_name.str(i); if (childName in links && links[childName] !== parentName) { console.log(childName + " linked to " + links[childName] + ", ignoring link to " + parentName); } links[childName] = parentName; } } } }); // get field data Object.keys(categories).forEach(function (fullName) { var _a; var d = categories[fullName]; if (!d) { console.log("'" + fullName + "' not found, moving on"); return; } var categoryName = d.header.substring(d.header[0] === '_' ? 1 : 0, d.header.indexOf('.')); var itemName = d.header.substring(d.header.indexOf('.') + 1); var fields; if (categoryName in tables) { fields = tables[categoryName].columns; tables[categoryName].key.add(itemName); } else if (categoryName.toLowerCase() in tables) { // take case from category name in 'field' data as it is better if data is from cif dictionaries tables[categoryName] = tables[categoryName.toLowerCase()]; fields = tables[categoryName].columns; } else { console.log("category '" + categoryName + "' has no metadata"); fields = {}; tables[categoryName] = { description: '', key: new Set(), columns: fields }; } var itemAliases = getAliases(d, imports, ctx); if (itemAliases) aliases[categoryName + "." + itemName] = itemAliases; var description = getDescription(d, imports, ctx) || ''; // need to use regex to check for matrix or vector items // as sub_category assignment is missing for some entries var subCategory = getSubCategory(d, imports, ctx); if (subCategory === 'cartesian_coordinate' || subCategory === 'fractional_coordinate') { fields[itemName] = CoordCol(description); } else if (FORCE_INT_FIELDS.includes(d.header)) { fields[itemName] = IntCol(description); console.log("forcing int: " + d.header); } else if (FORCE_MATRIX_FIELDS.includes(d.header)) { fields[itemName] = FloatCol(description); fields[FORCE_MATRIX_FIELDS_MAP[d.header]] = MatrixCol(3, 3, description); console.log("forcing matrix: " + d.header); } else if (subCategory === 'matrix') { fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description); } else if (subCategory === 'vector') { fields[itemName.replace(reVectorField, '')] = VectorCol(3, description); } else { if (itemName.match(reMatrixField)) { fields[itemName.replace(reMatrixField, '')] = MatrixCol(3, 3, description); console.log(d.header + " should have 'matrix' _item_sub_category.id"); } else if (itemName.match(reVectorField)) { fields[itemName.replace(reVectorField, '')] = VectorCol(3, description); console.log(d.header + " should have 'vector' _item_sub_category.id"); } else { var code = getCode(d, imports, ctx); if (code) { var fieldType = getFieldType(code[0], description, code[1], code[2]); if (fieldType.type === 'str') { if (COMMA_SEPARATED_LIST_FIELDS.includes(d.header)) { fieldType = ListCol('str', ',', description); console.log("forcing comma separated: " + d.header); } else if (SPACE_SEPARATED_LIST_FIELDS.includes(d.header)) { fieldType = ListCol('str', ' ', description); console.log("forcing space separated: " + d.header); } else if (SEMICOLON_SEPARATED_LIST_FIELDS.includes(d.header)) { fieldType = ListCol('str', ';', description); console.log("forcing space separated: " + d.header); } } if (d.header in EXTRA_ENUM_VALUES) { if (fieldType.type === 'enum') { (_a = fieldType.values).push.apply(_a, EXTRA_ENUM_VALUES[d.header]); } else { console.warn("expected enum: " + d.header); } } fields[itemName] = fieldType; } else { fields[itemName] = StrCol(description); // console.log(`could not determine code for '${d.header}'`) } } } }); return { tables: tables, aliases: aliases }; } //# sourceMappingURL=cif-dic.js.map