UNPKG

llm-code-format

Version:

Parsing and serialization of multiple code files in Markdown for LLMs

95 lines (94 loc) 3.65 kB
export class StreamingMarkdownParser { buffer = ""; insideCodeFence = false; currentFileName = null; detectedFormat = "Unknown Format"; callbacks; /** * An array of regex patterns for detecting file headers. * Currently only supports Bold Format, but can be extended in the future. */ headerPatterns = [ // Matches: **filename.js** { regex: /^\s*\*\*([^\n*`]+?)\*\*(?:[^\n]*)\s*$/, format: "Bold Format", }, ]; constructor(callbacks) { this.callbacks = callbacks; } /** * Processes an incoming chunk from the stream. * Chunks are buffered until full lines (ending with '\n') are available. * @param chunk - A string chunk from the stream. */ async processChunk(chunk) { this.buffer += chunk; let newlineIndex; while ((newlineIndex = this.buffer.indexOf("\n")) !== -1) { const line = this.buffer.slice(0, newlineIndex); this.buffer = this.buffer.slice(newlineIndex + 1); await this.processLine(line); } } /** * Flushes any remaining content in the buffer. * Should be called once after the stream has ended. */ async flushRemaining() { if (this.buffer.length > 0) { await this.processLine(this.buffer); this.buffer = ""; } } /** * Processes a single line. * If the line is a code fence marker (starting with "```"), it toggles the code block state. * When inside a code block, every line is emitted via onCodeLine. * Outside of a code block, the line is checked against header patterns, * and if a match is found, onFileNameChange is invoked. * @param line - A single line of text. */ async processLine(line) { // Check if the line is a code fence marker (could be "```" or "```lang") if (line.trim().startsWith("```")) { this.insideCodeFence = !this.insideCodeFence; return; // The fence marker itself is not emitted as code content. } if (this.insideCodeFence) { // Emit every line inside the code fence as a code line. await this.callbacks.onCodeLine(line); } else { // Outside a code fence, check for file header patterns. for (const { regex, format } of this.headerPatterns) { const match = regex.exec(line); if (match) { let fileName = match[1].trim(); // For Bold Format, strip out parentheses and any content after them if (format === "Bold Format") { // Remove anything in parentheses and trim fileName = fileName.replace(/\s*\([^)]*\).*$/, "").trim(); } this.currentFileName = fileName; this.detectedFormat = format; await this.callbacks.onFileNameChange(fileName, format); break; // Stop after the first matching header is found. } } // Non-header lines outside code fences let isHeader = false; for (const { regex } of this.headerPatterns) { if (regex.test(line)) { isHeader = true; break; } } // If it's not a header and the callback exists, call it if (!isHeader && this.callbacks.onNonCodeLine) { await this.callbacks.onNonCodeLine(line); } } } }