UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

158 lines (157 loc) 5.36 kB
/** * Base Chunker * * Abstract base class for all chunker implementations. * Provides common functionality and interface contract. */ import { randomUUID } from "node:crypto"; import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js"; import { withSpan } from "../../telemetry/withSpan.js"; import { tracers } from "../../telemetry/tracers.js"; /** * Default chunker configuration */ export const DEFAULT_CHUNKER_CONFIG = { maxSize: 1000, overlap: 100, minSize: 10, preserveMetadata: true, }; /** * Base Chunker abstract class * * All chunker implementations should extend this class. */ export class BaseChunker { config; constructor(config) { this.config = { ...this.getDefaultConfig(), ...config }; this.validateConfig(); } /** * Get default configuration for this chunker */ getDefaultConfig() { return { ...DEFAULT_CHUNKER_CONFIG }; } /** * Validate chunker configuration */ validateConfig() { if (this.config.maxSize !== undefined && this.config.maxSize <= 0) { throw new ChunkingError("maxSize must be positive", { code: RAGErrorCodes.CHUNKING_INVALID_CONFIG, details: { maxSize: this.config.maxSize }, }); } if (this.config.overlap !== undefined && this.config.overlap < 0) { throw new ChunkingError("overlap cannot be negative", { code: RAGErrorCodes.CHUNKING_INVALID_CONFIG, details: { overlap: this.config.overlap }, }); } if (this.config.maxSize !== undefined && this.config.overlap !== undefined && this.config.overlap >= this.config.maxSize) { throw new ChunkingError("overlap must be less than maxSize", { code: RAGErrorCodes.CHUNKING_INVALID_CONFIG, details: { maxSize: this.config.maxSize, overlap: this.config.overlap, }, }); } } /** * Chunk content into smaller pieces */ async chunk(content, config) { return withSpan({ name: "neurolink.rag.chunk", tracer: tracers.rag, attributes: { "rag.chunker.strategy": this.strategy, "rag.chunker.content_chars": content.length, "rag.chunker.content_bytes": Buffer.byteLength(content, "utf8"), }, }, async (span) => { const effectiveConfig = { ...this.config, ...config }; if (!content || content.trim().length === 0) { throw new ChunkingError("Content is empty", { code: RAGErrorCodes.CHUNKING_EMPTY_CONTENT, strategy: this.strategy, contentLength: 0, }); } try { const chunks = await this.doChunk(content, effectiveConfig); const result = this.filterChunks(chunks, effectiveConfig); span.setAttribute("rag.chunker.chunk_count", result.length); return result; } catch (error) { if (error instanceof ChunkingError) { throw error; } throw new ChunkingError(`Chunking failed: ${error instanceof Error ? error.message : String(error)}`, { code: RAGErrorCodes.CHUNKING_ERROR, cause: error instanceof Error ? error : undefined, strategy: this.strategy, contentLength: content.length, }); } }); // end withSpan } /** * Filter chunks based on minimum size */ filterChunks(chunks, config) { const minSize = config.minSize ?? 0; return chunks.filter((chunk) => chunk.text.length >= minSize); } /** * Create a chunk object */ createChunk(text, chunkIndex, startPosition, endPosition, documentId = "unknown", customMetadata) { const metadata = { documentId, chunkIndex, startPosition, endPosition, custom: this.config.preserveMetadata ? customMetadata : undefined, }; return { id: randomUUID(), text, metadata, }; } /** * Split content by size with overlap */ splitBySizeWithOverlap(content, maxSize, overlap) { const result = []; let start = 0; while (start < content.length) { const end = Math.min(start + maxSize, content.length); result.push({ text: content.slice(start, end), start, end, }); // If we've reached the end of content, stop if (end >= content.length) { break; } // Move start position, accounting for overlap // Ensure start always moves forward by at least 1 character const nextStart = end - overlap; start = Math.max(nextStart, start + 1); // Prevent infinite loop if overlap >= chunk size if (start >= end) { break; } } return result; } }