sdf-parser
Version:
114 lines • 3.98 kB
JavaScript
import { ensureString } from 'ensure-string';
import { getEntriesBoundaries } from "./getEntriesBoundaries.js";
import { getMolecule } from "./util/getMolecule.js";
/**
* Synchronously parse an SDF file into an array of molecule objects.
* @param sdf - The SDF content as a string, `ArrayBuffer`, or `ArrayBufferView`.
* @param options - Parsing options.
* @returns A {@link ParseResult} containing molecules and statistics.
* @example
* ```ts
* import { readFileSync } from 'node:fs';
* import { parse } from 'sdf-parser';
*
* const sdf = readFileSync('compounds.sdf', 'utf8');
* const { molecules, statistics } = parse(sdf);
* ```
*/
export function parse(sdf, options = {}) {
options = { ...options };
if (options.modifiers === undefined)
options.modifiers = {};
if (options.forEach === undefined)
options.forEach = {};
if (options.dynamicTyping === undefined)
options.dynamicTyping = true;
// ensureString converts ArrayBuffer/ArrayBufferView to string
const sdfString = ensureString(sdf);
if (typeof sdfString !== 'string') {
throw new TypeError('Parameter "sdf" must be a string');
}
if (options.eol === undefined) {
options.eol = '\n';
if (!options.mixedEOL && sdfString.slice(0, 1000).includes('\r\n')) {
options.eol = '\r\n';
}
}
let workingSdf = sdfString;
if (options.mixedEOL) {
workingSdf = workingSdf.replaceAll('\r\n', '\n');
}
const eol = options.eol;
const modifiers = options.modifiers;
const forEachMap = options.forEach;
const dynamicTyping = options.dynamicTyping;
const entriesBoundaries = getEntriesBoundaries(workingSdf, `${eol}$$$$`, eol);
const molecules = [];
const labels = {};
const start = Date.now();
for (const boundary of entriesBoundaries) {
const sdfPart = workingSdf.slice(...boundary);
if (sdfPart.length < 40)
continue;
const currentLabels = [];
const molecule = getMolecule(sdfPart, labels, currentLabels, {
eol,
dynamicTyping,
modifiers,
forEach: forEachMap,
include: options.include,
exclude: options.exclude,
});
if (!molecule)
continue;
if (!options.filter || options.filter(molecule)) {
molecules.push(molecule);
for (const label of currentLabels) {
labels[label].counter++;
}
}
}
// Convert all numeric fields and compute min/max
for (const label in labels) {
const currentLabel = labels[label];
if (currentLabel.isNumeric) {
currentLabel.minValue = Infinity;
currentLabel.maxValue = -Infinity;
for (const molecule of molecules) {
if (molecule[label]) {
const value = Number.parseFloat(molecule[label]);
molecule[label] = value;
if (value > (currentLabel.maxValue ?? -Infinity)) {
currentLabel.maxValue = value;
}
if (value < (currentLabel.minValue ?? Infinity)) {
currentLabel.minValue = value;
}
}
}
}
}
for (const key in labels) {
labels[key].always = labels[key].counter === molecules.length;
}
const statistics = [];
for (const key in labels) {
const info = labels[key];
statistics.push({
label: key,
counter: info.counter,
isNumeric: info.isNumeric,
keep: info.keep,
minValue: info.minValue,
maxValue: info.maxValue,
always: info.always ?? false,
});
}
return {
time: Date.now() - start,
molecules,
labels: Object.keys(labels),
statistics,
};
}
//# sourceMappingURL=parse.js.map