crystcif-parse
Version:
A parser for crystallographic CIF files
536 lines (475 loc) • 17 kB
JavaScript
'use strict';
var mjs = require('mathjs');
var mndtable = require('mendeleev').PeriodicTable;
var utils = require('./utils.js');
var parseCif = require('./parse.js').parseCif;
var symmetry = require('./symmetry.js');
/*
Functions and classes useful to transform a parsed CIF data structure into a
series of crystallographic structures.
*/
/**
* Convert a cartesian coordinates cell into a lengths and angles representation
* @param {Array} cell Cartesian form cell (must be 3x3)
* @param {bool} radians If true, angles are returned in radians
* @return {Array} Parameters in form [lengths, angles]
*/
function cellToCellpar(cell, radians) {
var lengths = cell.map(function(c) { return mjs.norm(c); });
var angles = [];
for (var i = 0; i < 3; ++i) {
var j = (i + 2) % 3;
var k = (i + 1) % 3;
var ll = lengths[j] * lengths[k];
if (ll > 1e-16) {
var x = mjs.dot(cell[j], cell[k]) / ll;
var angle = Math.acos(x);
} else {
var angle = Math.PI / 2.0;
}
angles.push(angle);
}
if (!radians) {
angles = angles.map(utils.radToDeg);
}
return [lengths, angles];
}
module.exports.cellToCellpar = cellToCellpar;
/**
* Convert a cell into lengths and angles representation to cartesian form
* @param {Array} cellpar Lengths and angles form of cell (must be 2x3)
* @param {Array} ab_normal Desired direction for the normal to the AB plane
* @param {Array} a_direction Direction for the a parameter
* @param {bool} radians If true, consider the angles in radians
* @return {Array} Cell in Cartesian form
*/
function cellparToCell(cellpar, ab_normal, a_direction, radians) {
ab_normal = ab_normal || [0, 0, 1]; // Default is the XY plane
if (!a_direction) {
if (mjs.norm(utils.cross(ab_normal, [1, 0, 0])) < 1e-5) {
a_direction = [0, 0, 1]; // Only if the AB plane contains X
} else {
a_direction = [1, 0, 0];
}
}
// Define rotated X,Y,Z-system, with Z along ab_normal and X along
// the projection of a_direction onto the normal plane of Z.
var ad = utils.unit(a_direction);
var Z = utils.unit(ab_normal);
var X = utils.unit(mjs.subtract(ad, mjs.dotMultiply(mjs.dot(ad, Z), Z)));
var Y = utils.cross(Z, X);
// Express va, vb and vc in the X,Y,Z-system
var l = cellpar[0];
var angs = cellpar[1];
if (!radians) {
angs = angs.map(utils.degToRad);
}
var cosa = angs.map(Math.cos);
var sina = angs.map(Math.sin);
for (var i = 0; i < 3; ++i) {
// Round for orthorombic cells
if (Math.abs(Math.abs(sina[i]) - 1) < 1e-14) {
sina[i] = Math.sign(sina[i]);
cosa[i] = 0.0;
}
}
// Build the cell vectors
var va = [l[0], 0, 0];
var vb = [l[1] * cosa[2], l[1] * sina[2], 0];
var vc = [l[2] * cosa[1],
l[2] * (cosa[0] - cosa[1] * cosa[2]) / sina[2],
0
];
vc[2] = Math.sqrt(l[2] * l[2] - vc[0] * vc[0] - vc[1] * vc[1]);
// Convert to the Cartesian x, y, z - system
var cell = mjs.multiply([va, vb, vc], [X, Y, Z]);
return cell
}
module.exports.cellparToCell = cellparToCell;
/**
* A class holding a crystallographic structure. Inspired by the Atoms class
* of the Atomic Simulation Environment.
* @class
* @param {Array} elems Array of element symbols or atomic numbers
* @param {Array} positions Array of 3D positions
* @param {Array} cell Array describing the unit cell. Can be any of
* the following:
* - false/null: no periodic boundary
* - Single float: cubic cell with that parameter
* - Array of 3 floats: orthorombic cell with
* those parameters
* - Array of 3x3 floats: full carthesian cell
* definition
* - Any of the previous two, but with one or two
* elements replaced by false/null: partial
* periodicity
* - Array of 2x3 floats: cell in lengths+angles
* format. Angles must be in degrees
* @param {Object} info Additional data attached to the structure
* @param {bool} scaled If true, interpret the positions as scaled
* instead of absolute
* @param {bool} tolerant If true, accept even symbols of elements that
* do not exist in the periodic table
*/
var Atoms = function(elems, positions, cell, info, scaled, tolerant) {
// Sanity checks
var symbols = [];
var numbers = [];
for (var i = 0; i < elems.length; ++i) {
var el = elems[i];
var a;
var is_num = (typeof(el) == 'number');
if (is_num) {
a = mndtable.getAtomic(el);
} else {
a = mndtable.getElement(el);
}
if (a === null) {
if (is_num || !tolerant) {
throw Error('Non-existing element "' + el + '" passed to Atoms');
} else {
a = {
'symbol': el, // Default for unknown element symbols
'number': -1
}
}
}
symbols.push(a.symbol);
numbers.push(a.number);
}
this._arrays = {
'symbols': symbols,
'numbers': numbers,
}
this._N = symbols.length;
// Now on to storing the cell
this._pbc = [true, true, true];
this._inv_cell = null;
if (!cell) {
this._pbc = [false, false, false];
this._cell = null;
} else if (typeof(cell) == 'number') {
var a = cell;
this._cell = [
[a, 0, 0],
[0, a, 0],
[0, 0, a]
];
} else if (cell.length == 2 && cell[0].length == 3 && cell[1].length == 3) {
// Lenghts + angles format
this._cell = cellparToCell(cell);
} else if (cell.length != 3) {
throw Error('Invalid cell passed to set_cell');
} else {
this._cell = [];
for (var i = 0; i < 3; ++i) {
if (!cell[i]) {
this._cell.push(null);
this._pbc[i] = false;
} else if (typeof(cell[i]) == 'number') {
var row = [0, 0, 0];
row[i] = cell[i];
this._cell.push(row);
} else if (cell[i].length != 3) {
throw Error('Invalid cell passed to set_cell');
} else {
this._cell.push(cell[i]);
}
}
}
if (cell && !(this._cell.includes(null)))
this._inv_cell = mjs.inv(this._cell);
// Check that the positions are the right size
var check_pos = (positions.length == this._N);
for (var i = 0; i < positions.length; ++i) {
check_pos = check_pos && (positions[i].length == 3);
}
if (!check_pos) {
throw Error('Invalid positions array passed to Atoms');
}
if (scaled) {
if (this._inv_cell === null) {
// Periodicity isn't full!
throw Error('Impossible to use scaled coordinates with non-periodic system');
}
positions = mjs.multiply(positions, this._cell);
}
this.set_array('positions', positions);
info = info || {};
this.info = info;
}
// Prototype for Atoms. We focus on getters, not setters, because it's not
// meant to be modified besides its arrays.
Atoms.prototype = {
length: function() {
return this._N;
},
set_array: function(name, arr) {
// Check that it's the right shape
if (!(arr.length == this._N)) {
throw Error('Invalid array size');
}
this._arrays[name] = arr;
},
get_array: function(name) {
return this._arrays[name];
},
get_chemical_symbols: function() {
return utils.deepClone(this.get_array('symbols'));
},
get_atomic_numbers: function() {
return utils.deepClone(this.get_array('numbers'));
},
get_cell: function() {
return utils.deepClone(this._cell);
},
get_inv_cell: function() {
return utils.deepClone(this._inv_cell);
},
get_pbc: function() {
return utils.deepClone(this._pbc);
},
get_positions: function() {
return utils.deepClone(this.get_array('positions'));
},
get_scaled_positions: function() {
var pos = this.get_array('positions');
var spos = [];
var ic = this._inv_cell;
// Going the long way about this because it's performance critical
for (var i = 0; i < pos.length; ++i) {
var p = pos[i];
var sp = [p[0]*ic[0][0]+p[1]*ic[1][0]+p[2]*ic[2][0],
p[0]*ic[0][1]+p[1]*ic[1][1]+p[2]*ic[2][1],
p[0]*ic[0][2]+p[1]*ic[1][2]+p[2]*ic[2][2]];
spos.push(sp);
}
return spos;
}
}
// Utility functions needed for CIF parsing. Not meant for outside use!
/**
* This function extracts a series of tags from a cif block. It searches for
* them and, if they are loops, only returns the ones that are as long as the
* FIRST element of tags (which is assumed obligatory; if not found, null is
* returned)
*/
function _extract_tags(cblock, tags) {
var extracted = tags.map(function(tag) {
return cblock[tag];
});
if (extracted[0] === undefined)
return null;
// Verify that, in case of loops, all tags have the same length
var basetype = extracted[0].type;
var baselen = null;
if (basetype == 'loop') {
baselen = extracted[0].value.length;
}
extracted = extracted.map(function(x) {
if (x === undefined)
return null;
if (x.type != basetype)
return null;
if (basetype == 'loop' && x.value.length != baselen)
return null;
if (basetype == 'loop') {
return x.value;
} else {
return [x.value];
}
});
return extracted;
}
function _atom_types(cblock) {
// Extract atom types from cblock
var atype_tags = ['_atom_type_symbol',
'_atom_type_description',
'_atom_type_radius_bond'
];
var typevals = _extract_tags(cblock, atype_tags);
if (!typevals) {
return null; // No types found
}
var atypes = {};
for (var i = 0; i < typevals[0].length; ++i) {
var sym = typevals[0][i].text;
atypes[sym] = {};
for (var j = 1; j < atype_tags.length; ++j) {
if (typevals[j] != null)
atypes[sym][atype_tags[j].slice(11)] =
typevals[j][i].get_value();
}
}
return atypes;
}
function _atom_sites(cblock) {
// Extract atom sites from cblock
var asite_tags = ['_atom_site_label',
'_atom_site_type_symbol',
'_atom_site_Cartn_x',
'_atom_site_Cartn_y',
'_atom_site_Cartn_z',
'_atom_site_fract_x',
'_atom_site_fract_y',
'_atom_site_fract_z'
];
var sitevals = _extract_tags(cblock, asite_tags);
if (!sitevals) {
return null; // No sites found
}
var asites = [];
for (var i = 0; i < sitevals[0].length; ++i) {
asites.push({});
for (var j = 0; j < asite_tags.length; ++j) {
if (sitevals[j] != null)
asites[i][asite_tags[j].slice(11)] = sitevals[j][i].get_value();
}
}
return asites;
}
function _cellpars(cblock) {
var cell_tags = [
'_cell_length_a',
'_cell_length_b',
'_cell_length_c',
'_cell_angle_alpha',
'_cell_angle_beta',
'_cell_angle_gamma'
];
var cellpars = [
[],
[]
];
for (var i = 0; i < 6; ++i) {
var val = cblock[cell_tags[i]];
if (val === undefined)
return null;
cellpars[Math.floor(i / 3)].push(val.value.get_value());
}
// If any of a, b, c is zero, then it's invalid
if (cellpars[0].some(function(x) { return (x == 0);})) {
return null;
}
return cellpars;
}
function _symops(cblock) {
// Look for either tag
var symopvals = (cblock._space_group_symop_operation_xyz ||
cblock._symmetry_equiv_pos_as_xyz);
// Hall symbol?
var hallsymbol = (cblock._space_group_name_Hall ||
cblock._symmetry_space_group_name_Hall);
var symops = null;
if (symopvals) {
symops = [];
if (symopvals.type == 'single' ||
symopvals.value.length == 1) {
// Either way is pointless (only one value means identity)
return null;
}
for (var i = 1; i < symopvals.value.length; ++i) {
symops.push(symmetry.parseSymOp(symopvals.value[i].text));
}
} else if (hallsymbol) {
symops = symmetry.interpretHallSymbol(hallsymbol);
}
return symops;
}
/** CIF to Atoms parser
* @static
* @param {string} cif CIF file in text format
* @param {float} symtol Distance under which two symmetric copies are
* considered equivalent
*
* @return {Object} Dictionary of parsed crystal structures
*/
Atoms.readCif = function(cif, symtol=1e-3) {
var cifdict = parseCif(cif);
// Consider any data block
var structs = {};
for (var bname in cifdict) {
var block = cifdict[bname];
if ('_atom_site_label' in block) {
structs[bname] = block;
}
}
for (var sname in structs) {
var cblock = cifdict[sname];
// Start by identifying atom types, if present
var atypes = _atom_types(cblock);
var asites = _atom_sites(cblock);
var cellpars = _cellpars(cblock);
// Is this system periodic?
var pbc = (cellpars !== null);
if (pbc) {
var cell = cellparToCell(cellpars);
}
// Extract symbols, labels, and positions
var symbols = [];
var labels = [];
var positions = [];
for (var i = 0; i < asites.length; ++i) {
symbols.push(asites[i].type_symbol);
labels.push(asites[i].label);
var p = [asites[i].Cartn_x, asites[i].Cartn_y, asites[i].Cartn_z];
if (p.some(function(x) { return x === undefined; })) {
// Then we need the fractional ones!
if (!pbc) {
// Does not make sense...
throw Error('Absolute coordinates are necessary without a unit cell');
}
p = [asites[i].fract_x, asites[i].fract_y, asites[i].fract_z];
p = mjs.multiply(p, cell);
}
positions.push(p);
}
// Now for symmetry operations...
if (pbc) {
var symops = _symops(cblock);
if (symops){
var fpos = mjs.multiply(positions, mjs.inv(cell));
var allfpos = [];
var allsyms = [];
var alllabs = [];
// Otherwise, find the new positions
for (var i = 0; i < fpos.length; ++i) {
var p0 = fpos[i];
var allp = [p0];
for (var j = 0; j < symops.length; ++j) {
var rot = symops[j][0];
var tr = symops[j][1];
var p = mjs.add(mjs.multiply(rot, p0), tr);
p = utils.mod1(p);
// Is it equivalent to something else?
var eq = false;
for (var k = 0; k < allp.length; ++k) {
var r = mjs.subtract(p, allp[k]);
r = utils.shortestPeriodicLength(utils.mod1(r));
if (r < symtol) {
eq = true;
break;
}
}
if (!eq) {
allp.push(p);
}
}
// Concatenate
allfpos = allfpos.concat(allp);
allsyms = allsyms.concat(Array(allp.length).fill(symbols[i]));
alllabs = alllabs.concat(Array(allp.length).fill(labels[i]));
}
symbols = allsyms;
labels = alllabs;
positions = mjs.multiply(allfpos, cell);
}
}
// Create the atoms object
var a = new Atoms(symbols, positions, cell, {});
a.set_array('labels', labels);
structs[sname] = a;
}
return structs;
}
module.exports.Atoms = Atoms;