UNPKG

sdf-parser

Version:
115 lines (104 loc) 3.44 kB
import { parseString } from 'dynamic-typing'; import { MolfileStream } from './MolfileStream.ts'; /** * A molecule entry returned by the {@link iterator} async generator. * The `molfile` field contains the raw V2000/V3000 molfile block. * Additional fields are populated from the SDF `> <field>` sections. */ export interface IteratorMolecule { /** The raw V2000/V3000 molfile block. */ molfile: string; // eslint-disable-next-line @typescript-eslint/no-explicit-any [label: string]: any; } /** * Options for the {@link iterator} async generator. */ export interface IteratorOptions { /** * End-of-line character used to split field entries. * @default '\n' */ eol?: string; /** * Controls EOL normalisation before splitting on `$$$$`. * - `undefined` (default): inspect the first 10 000 characters; normalise only if `\r` is detected. * - `true`: always normalise CR and CRLF to LF. * - `false`: never normalise; use when the file is known to be pure LF. */ mixedEOL?: boolean; /** * When `true`, numeric string values are automatically converted to numbers. * @default true */ dynamicTyping?: boolean; /** * A predicate function to filter molecules. Only molecules for which this * function returns `true` are yielded. */ filter?: (molecule: IteratorMolecule) => boolean; } /** * Asynchronously iterate over molecules from a text-decoded SDF stream. * @param readStream - A `ReadableStream<string>` supplying SDF text content. * @param options - Iterator options. * @yields {IteratorMolecule} Individual molecule objects. * @example * ```ts * import { openAsBlob } from 'node:fs'; * import { iterator } from 'sdf-parser'; * * const blob = await openAsBlob('compounds.sdf'); * const textDecoder = new TextDecoderStream(); * for await (const molecule of iterator(blob.stream().pipeThrough(textDecoder))) { * console.log(molecule.molfile); * } * ``` */ export async function* iterator( readStream: ReadableStream<string>, options: IteratorOptions = {}, ): AsyncGenerator<IteratorMolecule> { const { eol = '\n', mixedEOL, dynamicTyping = true } = options; const moleculeStream = readStream.pipeThrough( new MolfileStream({ mixedEOL }), ); const effectiveEol = mixedEOL !== false ? '\n' : eol; for await (const entry of moleculeStream) { const molecule = parseMolecule(entry, { eol: effectiveEol, dynamicTyping }); if (!options.filter || options.filter(molecule)) { yield molecule; } } } interface ParseMoleculeOptions { eol: string; dynamicTyping: boolean; } function parseMolecule( sdfPart: string, options: ParseMoleculeOptions, ): IteratorMolecule { const { eol, dynamicTyping } = options; const parts = sdfPart.split(`${eol}>`); const molecule: IteratorMolecule = { molfile: parts.length > 0 && parts[0].length > 5 ? parts[0] + eol : '', }; for (let j = 1; j < parts.length; j++) { const lines = parts[j].split(eol); const from = lines[0].indexOf('<'); const to = lines[0].indexOf('>'); const label = lines[0].slice(from + 1, to); for (let k = 1; k < lines.length - 1; k++) { if (molecule[label]) { molecule[label] = `${molecule[label] as string}${eol}${lines[k]}`; } else { molecule[label] = lines[k]; } } if (dynamicTyping) { molecule[label] = parseString(molecule[label]); } } return molecule; }