UNPKG

sdf-parser

Version:
114 lines 3.98 kB
import { ensureString } from 'ensure-string'; import { getEntriesBoundaries } from "./getEntriesBoundaries.js"; import { getMolecule } from "./util/getMolecule.js"; /** * Synchronously parse an SDF file into an array of molecule objects. * @param sdf - The SDF content as a string, `ArrayBuffer`, or `ArrayBufferView`. * @param options - Parsing options. * @returns A {@link ParseResult} containing molecules and statistics. * @example * ```ts * import { readFileSync } from 'node:fs'; * import { parse } from 'sdf-parser'; * * const sdf = readFileSync('compounds.sdf', 'utf8'); * const { molecules, statistics } = parse(sdf); * ``` */ export function parse(sdf, options = {}) { options = { ...options }; if (options.modifiers === undefined) options.modifiers = {}; if (options.forEach === undefined) options.forEach = {}; if (options.dynamicTyping === undefined) options.dynamicTyping = true; // ensureString converts ArrayBuffer/ArrayBufferView to string const sdfString = ensureString(sdf); if (typeof sdfString !== 'string') { throw new TypeError('Parameter "sdf" must be a string'); } if (options.eol === undefined) { options.eol = '\n'; if (!options.mixedEOL && sdfString.slice(0, 1000).includes('\r\n')) { options.eol = '\r\n'; } } let workingSdf = sdfString; if (options.mixedEOL) { workingSdf = workingSdf.replaceAll('\r\n', '\n'); } const eol = options.eol; const modifiers = options.modifiers; const forEachMap = options.forEach; const dynamicTyping = options.dynamicTyping; const entriesBoundaries = getEntriesBoundaries(workingSdf, `${eol}$$$$`, eol); const molecules = []; const labels = {}; const start = Date.now(); for (const boundary of entriesBoundaries) { const sdfPart = workingSdf.slice(...boundary); if (sdfPart.length < 40) continue; const currentLabels = []; const molecule = getMolecule(sdfPart, labels, currentLabels, { eol, dynamicTyping, modifiers, forEach: forEachMap, include: options.include, exclude: options.exclude, }); if (!molecule) continue; if (!options.filter || options.filter(molecule)) { molecules.push(molecule); for (const label of currentLabels) { labels[label].counter++; } } } // Convert all numeric fields and compute min/max for (const label in labels) { const currentLabel = labels[label]; if (currentLabel.isNumeric) { currentLabel.minValue = Infinity; currentLabel.maxValue = -Infinity; for (const molecule of molecules) { if (molecule[label]) { const value = Number.parseFloat(molecule[label]); molecule[label] = value; if (value > (currentLabel.maxValue ?? -Infinity)) { currentLabel.maxValue = value; } if (value < (currentLabel.minValue ?? Infinity)) { currentLabel.minValue = value; } } } } } for (const key in labels) { labels[key].always = labels[key].counter === molecules.length; } const statistics = []; for (const key in labels) { const info = labels[key]; statistics.push({ label: key, counter: info.counter, isNumeric: info.isNumeric, keep: info.keep, minValue: info.minValue, maxValue: info.maxValue, always: info.always ?? false, }); } return { time: Date.now() - start, molecules, labels: Object.keys(labels), statistics, }; } //# sourceMappingURL=parse.js.map