sdf-parser
Version:
87 lines • 3.33 kB
JavaScript
import { normaliseChunk } from "./normaliseChunk.js";
const SNIFF_LENGTH = 10_000;
/**
* A `TransformStream` that splits an incoming SDF text stream on the `$$$$`
* record delimiter and emits individual molfile strings.
*
* Handles CRLF, LF, and mixed line endings. By default (`mixedEOL: undefined`)
* the stream sniffs the first 10 000 characters to detect `\r` and activates
* normalisation only when needed — zero overhead for pure-LF files.
*
* Entries shorter than 40 characters are discarded.
* @example
* ```ts
* const stream = readStream.pipeThrough(new MolfileStream());
* for await (const molfile of stream) {
* console.log(molfile);
* }
* ```
*/
export class MolfileStream extends TransformStream {
constructor({ mixedEOL } = {}) {
let splitBuffer = '';
const crState = { pendingCR: false };
// auto-detection state
let sniffBuffer = '';
let decided = mixedEOL !== undefined;
let normalise = mixedEOL === true;
function splitRecords(text, controller) {
const combined = splitBuffer + text;
splitBuffer = '';
let begin = 0;
let index = 0;
while ((index = combined.indexOf('$$$$', index)) !== -1) {
const endOfDelimiter = combined.indexOf('\n', index);
if (endOfDelimiter === -1) {
index = begin;
break;
}
const eolLength = combined[endOfDelimiter - 1] === '\r' ? 2 : 1;
if (index - eolLength - begin > 40) {
controller.enqueue(combined.slice(begin, index - eolLength));
}
index = endOfDelimiter + 1;
begin = index;
}
if (begin < combined.length) {
splitBuffer = combined.slice(begin);
}
}
super({
transform(chunk, controller) {
if (decided) {
splitRecords(normalise ? normaliseChunk(chunk, crState) : chunk, controller);
return;
}
sniffBuffer += chunk;
if (sniffBuffer.length < SNIFF_LENGTH)
return;
decided = true;
normalise = sniffBuffer.includes('\r');
const text = normalise
? normaliseChunk(sniffBuffer, crState)
: sniffBuffer;
sniffBuffer = '';
splitRecords(text, controller);
},
flush(controller) {
if (!decided) {
// Stream ended before SNIFF_LENGTH chars were seen.
normalise = sniffBuffer.includes('\r');
const text = normalise
? normaliseChunk(sniffBuffer, crState)
: sniffBuffer;
splitRecords(text, controller);
}
if (normalise && crState.pendingCR) {
splitBuffer += '\r';
crState.pendingCR = false;
}
if (splitBuffer.length > 40) {
controller.enqueue(splitBuffer);
}
},
});
}
}
//# sourceMappingURL=MolfileStream.js.map