UNPKG

@stackmemoryai/stackmemory

Version:

Project-scoped memory for AI coding tools. Durable context across sessions with MCP integration, frames, smart retrieval, Claude Code skills, and automatic hooks.

196 lines (195 loc) 5.49 kB
import { fileURLToPath as __fileURLToPath } from 'url'; import { dirname as __pathDirname } from 'path'; const __filename = __fileURLToPath(import.meta.url); const __dirname = __pathDirname(__filename); import { createReadStream } from "fs"; import { createInterface } from "readline"; import { Transform, pipeline } from "stream"; import { promisify } from "util"; import { logger } from "../monitoring/logger.js"; const pipelineAsync = promisify(pipeline); class StreamingJSONLParser { DEFAULT_BATCH_SIZE = 100; DEFAULT_MAX_LINE_LENGTH = 1024 * 1024; // 1MB per line /** * Stream-parse a JSONL file with batching and backpressure handling */ async *parseStream(filePath, options = {}) { const { batchSize = this.DEFAULT_BATCH_SIZE, maxLineLength = this.DEFAULT_MAX_LINE_LENGTH, filter, transform, onProgress } = options; const stream = createReadStream(filePath, { encoding: "utf8", highWaterMark: 64 * 1024 // 64KB chunks }); const rl = createInterface({ input: stream, crlfDelay: Infinity, historySize: 0 // Disable history for memory efficiency }); let batch = []; let lineCount = 0; let processedCount = 0; let errorCount = 0; try { for await (const line of rl) { lineCount++; if (line.length > maxLineLength) { logger.warn("Skipping oversized line", { lineNumber: lineCount, length: line.length, maxLength: maxLineLength }); errorCount++; continue; } if (!line.trim()) continue; try { let obj = JSON.parse(line); if (filter && !filter(obj)) continue; if (transform) obj = transform(obj); batch.push(obj); processedCount++; if (batch.length >= batchSize) { yield batch; batch = []; onProgress?.(processedCount); } } catch (parseError) { errorCount++; logger.debug("Failed to parse JSONL line", { lineNumber: lineCount, error: parseError, preview: line.substring(0, 100) }); } } if (batch.length > 0) { yield batch; onProgress?.(processedCount); } } finally { rl.close(); stream.destroy(); logger.debug("JSONL parsing complete", { filePath, totalLines: lineCount, processed: processedCount, errors: errorCount }); } } /** * Parse entire file into memory (use for smaller files) */ async parseAll(filePath, options = {}) { const results = []; for await (const batch of this.parseStream(filePath, { ...options, batchSize: Number.MAX_SAFE_INTEGER })) { results.push(...batch); } return results; } /** * Process JSONL file with a custom processor function */ async process(filePath, processor, options = {}) { const results = []; for await (const batch of this.parseStream(filePath, options)) { const result = await processor(batch); results.push(result); } return results; } /** * Create a transform stream for JSONL parsing */ createTransformStream(options = {}) { const { filter, transform, maxLineLength = this.DEFAULT_MAX_LINE_LENGTH } = options; let buffer = ""; let lineCount = 0; return new Transform({ objectMode: true, transform(chunk, encoding, callback) { buffer += chunk.toString(); const lines = buffer.split("\n"); buffer = lines.pop() || ""; for (const line of lines) { lineCount++; if (!line.trim()) continue; if (line.length > maxLineLength) { logger.warn("Skipping oversized line in transform", { lineCount }); continue; } try { let obj = JSON.parse(line); if (filter && !filter(obj)) continue; if (transform) obj = transform(obj); this.push(obj); } catch (error) { logger.debug("Transform parse error", { lineCount, error }); } } callback(); }, flush(callback) { if (buffer.trim()) { try { let obj = JSON.parse(buffer); if (!filter || filter(obj)) { if (transform) obj = transform(obj); this.push(obj); } } catch (error) { logger.debug("Flush parse error", { error }); } } callback(); } }); } /** * Count lines in JSONL file without parsing */ async countLines(filePath) { const stream = createReadStream(filePath, { encoding: "utf8" }); const rl = createInterface({ input: stream, historySize: 0 }); let count = 0; for await (const _ of rl) { count++; } return count; } /** * Sample random lines from JSONL file */ async *sampleLines(filePath, sampleRate, options = {}) { if (sampleRate <= 0 || sampleRate > 1) { throw new Error("Sample rate must be between 0 and 1"); } for await (const batch of this.parseStream(filePath, options)) { for (const item of batch) { if (Math.random() < sampleRate) { yield item; } } } } } export { StreamingJSONLParser }; //# sourceMappingURL=streaming-jsonl-parser.js.map