UNPKG

nmr-learning

Version:

Learn a database of chemical shift and coupling constants assignments

165 lines (144 loc) 5.58 kB
const FS = require('fs'); const path = require('path'); const OCLE = require('openchemlib-extended'); const predictor = require('nmr-predictor'); const autoassigner = require('../../nmr-auto-assignment/src/index'); // const cheminfo = require('./preprocess/cheminfo'); // const maybridge = require('./preprocess/maybridge'); const compilePredictionTable = require('./compilePredictionTable'); const stats = require('./stats'); function loadFile(filename) { return FS.readFileSync(path.join(__dirname, filename)).toString(); } async function start() { var maxIterations = 15; // Set the number of interations for training var ignoreLabile = true;// Set the use of labile protons during training var learningRatio = 0.8; // A number between 0 and 1 const levels = [6, 5, 4, 3, 3]; var testSet = JSON.parse(loadFile('/../data/assigned298.json'));// File.parse("/data/nmrsignal298.json");//"/Research/NMR/AutoAssign/data/cobasSimulated"; var dataset1 = JSON.parse(FS.readFileSync('/home/acastillo/Documents/data/procjson/cheminfo443_y.json').toString()); var dataset2 = JSON.parse(FS.readFileSync('/home/acastillo/Documents/data/procjson/maybridge_y.json').toString()); var dataset3 = [];// JSON.parse(FS.readFileSync('/home/acastillo/Documents/data/procjson/big0.json').toString()); // dataset3.splice(0, 500) var datasets = [dataset1, dataset2, dataset3]; var start, date; var prevError = 0; var prevCont = 0; var dataset, max, ds, i, j, k, nAtoms; var result, solutions; // var fastDB = []; var fastDB = JSON.parse(loadFile('/../data/h_15.json')); console.log(`Cheminfo All: ${dataset1.length}`); console.log(`MayBridge All: ${dataset2.length}`); console.log(`Other All: ${dataset3.length}`); // Remove the overlap molecules from train and test var removed = 0; var trainDataset = []; for (i = 0; i < testSet.length; i++) { for (ds = 0; ds < datasets.length; ds++) { dataset = datasets[ds]; for (j = dataset.length - 1; j >= 0; j--) { if (dataset[j].general.ocl.hasLabile || testSet[i].diaID === dataset[j].general.ocl.id) { // if (testSet[i].diaID === dataset[j].general.ocl.id) { dataset.splice(j, 1); removed++; break; } } } } if (start === 0) { start += removed; } for (ds = 0; ds < datasets.length; ds++) { dataset = datasets[ds]; for (j = 0; j < dataset.length; j++) { trainDataset.push(dataset[j]); } } console.log(`Cheminfo Final: ${dataset1.length}`); console.log(`MayBridge Final: ${dataset2.length}`); console.log(`Other Final: ${dataset3.length}`); console.log(`Overlaped molecules: ${removed}. They were removed from training datasets`); // Run the learning process. After each iteration the system has seen every single molecule once // We have to use another stop criteria like convergence var iteration = 0; maxIterations = 10; var convergence = false; try { for (let level of levels) { date = new Date(); start = date.getTime(); var count = 0; dataset = trainDataset;// datasets[ds]; max = dataset.length; predictor.setDb(fastDB, 'proton', 'proton'); // we could now loop on the sdf to add the int index for (i = 0; i < max; i++) { let predictions = await predictor.proton(dataset[i].general.ocl, { condensed: true, OCLE: OCLE, levels: [level], hose: true, use: 'median', ignoreLabile: ignoreLabile, keepMolecule: true }); // console.log(i) let ranges = dataset[i].spectra.nmr[0].range; for (let k = predictions.length - 1; k >= 0; k--) { let pred = predictions[k]; let hose5 = pred.hose[level - 1]; if (pred.ncs > 4 && fastDB[level - 1][hose5]) { let found = false; for (let range of ranges) { if (Math.abs((range.from + range.to) - (pred.min + pred.max)) < (Math.abs(range.from - range.to) + Math.abs(pred.min - pred.max))) { if (!fastDB[level - 1][hose5].p) { fastDB[level - 1][hose5].p = 1; } else { fastDB[level - 1][hose5].p++; } found = true; break; } } // console.log(hose5) if (!found) { if (!fastDB[level - 1][hose5].n) { fastDB[level - 1][hose5].n = 1; } else { fastDB[level - 1][hose5].n++; } } } else { predictions.splice(k, 1); } } } let keys = Object.keys(fastDB[level - 1]); var deleted = 0; keys.forEach((key) => { let confidence = 0; if (fastDB[level - 1][key].p) { confidence = 1; if (fastDB[level - 1][key].n) { confidence = fastDB[level - 1][key].p / (fastDB[level - 1][key].p + fastDB[level - 1][key].n); } } fastDB[level - 1][key].conf = confidence; if (confidence > 0 && confidence < 0.2) { console.log(`${key}:${JSON.stringify(fastDB[level - 1][key])}`); delete fastDB[level - 1][key]; deleted++; // console.log(fastDB[4][key]); } }); console.log(`Deleted at ${level}:${deleted}`); } FS.writeFileSync(`${__dirname}/../data/h_clean.json`, JSON.stringify(fastDB)); } catch (e) { console.log(e); } console.log('Done'); } start();