nmr-learning
Version:
Learn a database of chemical shift and coupling constants assignments
300 lines (272 loc) • 8.37 kB
JavaScript
const histogram = require('./histogram');
function compare(A, B, hist) {
var error = 0;
var count = 0;
var max = 0;
var min = 9999999;
var tmp = 0;
var i, j;
// console.log(A.length+" "+B.length);
for (i = A.length - 1; i >= 0; i--) {
for (j = B.length - 1; j >= 0; j--) {
if (A[i].diaIDs[0] === B[j].diaIDs[0]) {
if (typeof A[i].delta !== 'undefined' && typeof B[j].delta !== 'undefined') {
tmp = Math.abs(A[i].delta - B[j].delta);
if (tmp > 4) {
// console.log(A[i].delta + " " + B[j].delta + " " + A[i].diaIDs[0]);
}
hist.push(tmp);
error += tmp;
count++;
if (tmp > max) {
max = tmp;
}
if (tmp < min) {
min = tmp;
}
}
break;
}
}
}
if (count !== 0) {
return { error: error / count, count: count, min: min, max: max };
}
return { error: 0, count: 0, min: 0, max: 0 };
}
function addObserved(A, B) {
var i, j;
for (i = A.length - 1; i >= 0; i--) {
A[i].delta2 = null;
for (j = B.length - 1; j >= 0; j--) {
if (A[i].diaIDs[0] === B[j].diaIDs[0]) {
if (typeof A[i].delta !== 'undefined' && typeof B[j].delta !== 'undefined') {
A[i].delta2 = B[j].delta;
}
break;
}
}
if (A[i].delta2 == null) {
A.splice(i, 1);
}
}
return A;
}
/*
function countByLevel(A, result) {
var i, j;
for (i = A.length - 1; i >= 0; i--) {
result[A[i].level]++;
}
}*/
function hoseStats(dataSet, nmrShiftDBPred1H, options) {
// console.log(options);
// var db = new DB.MySQL("localhost","mynmrshiftdb3","nmrshiftdb","xxswagxx");
let ACT = options.ACT;
var molecule, h1pred, i, j;
var result = [0, 0, 0, 0, 0, 0];
var db = options.db;
var predictions = new Array(dataSet.length);
for (i = 0; i < dataSet.length; i++) {
if (!dataSet[i].molecule) {
molecule = ACT.load(dataSet[i].molfile.replace(/\\n/g, '\n'));
molecule.expandHydrogens();
dataSet[i].molecule = molecule;
} else {
molecule = dataSet[i].molecule;
}
h1pred = nmrShiftDBPred1H(molecule, {
db: db,
debug: true,
iterationQuery: options.iterationQuery,
ignoreLabile: options.ignoreLabile,
hoseLevels: options.hoseLevels
});
for (j = h1pred.length - 1; j >= 0; j--) {
result[h1pred[j].level]++;
}
predictions[i] = addObserved(h1pred, dataSet[i].assignment);
}
return { hoseStats: result, predictions: predictions };
}
async function cmp2asg(dataSet, predictor, options) {
let OCLE = options.OCLE;
var h1pred, result;
var avgError = 0;
var count = 0;
var min = 9999999;
var max = 0;
// var db = options.db;
var hist = [];
for (var i = 0; i < dataSet.length; i++) {
if (!dataSet[i].ocl) {
var molecule = OCLE.Molecule.fromIDCode(dataSet[i].diaID);
molecule.addImplicitHydrogens();
var nH = molecule.getMolecularFormula().formula.replace(/.*H([0-9]+).*/, '$1') * 1;
var diaIDs = molecule.getGroupedDiastereotopicAtomIDs();
diaIDs.sort(function (a, b) {
if (a.atomLabel === b.atomLabel) {
return b.counter - a.counter;
}
return a.atomLabel < b.atomLabel ? 1 : -1;
});
const linksOH = molecule.getAllPaths({
fromLabel: 'H',
toLabel: 'O',
minLength: 1,
maxLength: 1
});
const linksNH = molecule.getAllPaths({
fromLabel: 'H',
toLabel: 'N',
minLength: 1,
maxLength: 1
});
const linksClH = molecule.getAllPaths({
fromLabel: 'Cl',
toLabel: 'N',
minLength: 1,
maxLength: 1
});
const atoms = {};
const levels = options.levels;
for (const diaId of diaIDs) {
delete diaId._highlight;
diaId.hose = OCLE.Util.getHoseCodesFromDiastereotopicID(diaId.oclID, {
maxSphereSize: levels[0],
type: 0
});
for (const atomID of diaId.atoms) {
atoms[atomID] = diaId.oclID;
}
diaId.isLabile = false;
for (const linkOH of linksOH) {
if (diaId.oclID === linkOH.fromDiaID) {
diaId.isLabile = true;
break;
}
}
for (const linkNH of linksNH) {
if (diaId.oclID === linkNH.fromDiaID) {
diaId.isLabile = true;
break;
}
}
for (const linkCl of linksClH) {
if (diaId.oclID === linkClH.fromDiaID) {
diaId.isLabile = true;
break;
}
}
}
dataSet[i].ocl = { id: molecule.getIDCode(), atom: atoms, diaId: diaIDs, nH: nH };
}
molecule = dataSet[i].ocl;
// console.log(molecule)
h1pred = await predictor.proton(molecule, {
ignoreLabile: options.ignoreLabile,
levels: options.levels
});
// console.log(dataSet[i].assignment);
// console.log(h1pred);
result = compare(h1pred, dataSet[i].assignment, hist);
// console.log(result);
avgError += result.error;
count += result.count;
if (result.min < min) {
min = result.min;
}
if (result.max > max) {
max = result.max;
}
}
var histParams = options.histParams || { from: 0, to: 1, nBins: 100 };
return {
error: avgError / dataSet.length, count: count, min: min, max: max, hist: histogram({
data: hist,
bins: linspace(histParams.from, histParams.to, histParams.nBins)
})
};
}
/*
function comparePredictors(dataSet, nmrShiftDBPred1H, options) {
//console.log(options);
//var db = new DB.MySQL("localhost","mynmrshiftdb3","nmrshiftdb","xxswagxx");
let ACT = options.ACT;
var other = 'h1';
var db = options.db;
var folder = options.dataset;
var avgError = 0;
var count = 0;
var min = 9999999;
var max = 0;
var spinus, molecule, diaIDs, h1pred, result;
var molecules = FS.dir(folder, {filter: '.mol'});//"/Research/NMR/AutoAssign/data/test"
var firstTime = false;
if (dataSet.length === 0) {
firstTime = true;
}
for (var i = 0; i < molecules.length; i++) {
//console.log(firstTime+" "+dataSet.length);
if (!firstTime) {
spinus = dataSet[i].spinus;
molecule = dataSet[i].molecule;
} else {
molecule = ACT.load(FS.load(molecules[i]));
molecule.expandHydrogens();
if (FS.exists(molecules[i].replace('.mol', '.' + other))) {
spinus = FS.loadJSON(molecules[i].replace('.mol', '.' + other));
} else {
diaIDs = molecule.getDiastereotopicAtomIDs('H');
spinus = SD.spinusPred1H(molecule.toMolfile(), {diaIDs: diaIDs});
//console.log('Saving...');
FS.save(molecules[i].replace('.mol', '.' + other), JSON.stringify(spinus));
}
dataSet.push({spinus: spinus, molecule: molecule});
}
if (spinus.length > 0) {
var hist = [];
h1pred = nmrShiftDBPred1H(molecule, {
db: db,
debug: false,
iterationQuery: options.iterationQuery,
ignoreLabile: options.ignoreLabile,
hoseLevels: options.hoseLevels
});
result = compare(h1pred, spinus, hist);
avgError += result.error;
count += result.count;
if (result.min < min) {
min = result.min;
}
if (result.max > max) {
max = result.max;
}
}
}
var histParams = options.histParams || {from: 0, to: 1, nBins: 100};
return {
error: avgError / dataSet.length, count: count, min: min, max: max, hist: histogram({
data: hist,
bins: linspace(histParams.from, histParams.to, histParams.nBins)
})
};
}*/
function linspace(a, b, n) {
if (typeof n === 'undefined') n = Math.max(Math.round(b - a) + 1, 1);
if (n < 2) {
return n === 1 ? [a] : [];
}
var i;
var ret = Array(n);
n--;
for (i = n; i >= 0; i--) {
ret[i] = (i * b + (n - i) * a) / n;
}
return ret;
}
module.exports = {
cmp2asg: cmp2asg,
hoseStats: hoseStats
// comparePredictors: comparePredictors
};