@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
158 lines (157 loc) • 5.36 kB
JavaScript
/**
* Base Chunker
*
* Abstract base class for all chunker implementations.
* Provides common functionality and interface contract.
*/
import { randomUUID } from "node:crypto";
import { ChunkingError, RAGErrorCodes } from "../errors/RAGError.js";
import { withSpan } from "../../telemetry/withSpan.js";
import { tracers } from "../../telemetry/tracers.js";
/**
* Default chunker configuration
*/
export const DEFAULT_CHUNKER_CONFIG = {
maxSize: 1000,
overlap: 100,
minSize: 10,
preserveMetadata: true,
};
/**
* Base Chunker abstract class
*
* All chunker implementations should extend this class.
*/
export class BaseChunker {
config;
constructor(config) {
this.config = { ...this.getDefaultConfig(), ...config };
this.validateConfig();
}
/**
* Get default configuration for this chunker
*/
getDefaultConfig() {
return { ...DEFAULT_CHUNKER_CONFIG };
}
/**
* Validate chunker configuration
*/
validateConfig() {
if (this.config.maxSize !== undefined && this.config.maxSize <= 0) {
throw new ChunkingError("maxSize must be positive", {
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
details: { maxSize: this.config.maxSize },
});
}
if (this.config.overlap !== undefined && this.config.overlap < 0) {
throw new ChunkingError("overlap cannot be negative", {
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
details: { overlap: this.config.overlap },
});
}
if (this.config.maxSize !== undefined &&
this.config.overlap !== undefined &&
this.config.overlap >= this.config.maxSize) {
throw new ChunkingError("overlap must be less than maxSize", {
code: RAGErrorCodes.CHUNKING_INVALID_CONFIG,
details: {
maxSize: this.config.maxSize,
overlap: this.config.overlap,
},
});
}
}
/**
* Chunk content into smaller pieces
*/
async chunk(content, config) {
return withSpan({
name: "neurolink.rag.chunk",
tracer: tracers.rag,
attributes: {
"rag.chunker.strategy": this.strategy,
"rag.chunker.content_chars": content.length,
"rag.chunker.content_bytes": Buffer.byteLength(content, "utf8"),
},
}, async (span) => {
const effectiveConfig = { ...this.config, ...config };
if (!content || content.trim().length === 0) {
throw new ChunkingError("Content is empty", {
code: RAGErrorCodes.CHUNKING_EMPTY_CONTENT,
strategy: this.strategy,
contentLength: 0,
});
}
try {
const chunks = await this.doChunk(content, effectiveConfig);
const result = this.filterChunks(chunks, effectiveConfig);
span.setAttribute("rag.chunker.chunk_count", result.length);
return result;
}
catch (error) {
if (error instanceof ChunkingError) {
throw error;
}
throw new ChunkingError(`Chunking failed: ${error instanceof Error ? error.message : String(error)}`, {
code: RAGErrorCodes.CHUNKING_ERROR,
cause: error instanceof Error ? error : undefined,
strategy: this.strategy,
contentLength: content.length,
});
}
}); // end withSpan
}
/**
* Filter chunks based on minimum size
*/
filterChunks(chunks, config) {
const minSize = config.minSize ?? 0;
return chunks.filter((chunk) => chunk.text.length >= minSize);
}
/**
* Create a chunk object
*/
createChunk(text, chunkIndex, startPosition, endPosition, documentId = "unknown", customMetadata) {
const metadata = {
documentId,
chunkIndex,
startPosition,
endPosition,
custom: this.config.preserveMetadata ? customMetadata : undefined,
};
return {
id: randomUUID(),
text,
metadata,
};
}
/**
* Split content by size with overlap
*/
splitBySizeWithOverlap(content, maxSize, overlap) {
const result = [];
let start = 0;
while (start < content.length) {
const end = Math.min(start + maxSize, content.length);
result.push({
text: content.slice(start, end),
start,
end,
});
// If we've reached the end of content, stop
if (end >= content.length) {
break;
}
// Move start position, accounting for overlap
// Ensure start always moves forward by at least 1 character
const nextStart = end - overlap;
start = Math.max(nextStart, start + 1);
// Prevent infinite loop if overlap >= chunk size
if (start >= end) {
break;
}
}
return result;
}
}