UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

67 lines (66 loc) 2.36 kB
/** * Sentence Chunker * * Splits text by sentence boundaries for semantically meaningful chunks. */ import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js"; /** * Sentence Chunker */ export class SentenceChunker extends BaseChunker { strategy = "sentence"; getDefaultConfig() { return { ...DEFAULT_CHUNKER_CONFIG, maxSize: 1000, overlap: 1, // Overlap in sentences }; } async doChunk(content, config) { const maxSize = config.maxSize ?? 1000; // Simple sentence splitting (can be enhanced with NLP) const sentences = this.splitIntoSentences(content); const chunks = []; let currentChunk = ""; let currentStart = 0; let chunkIndex = 0; for (const sentence of sentences) { if (currentChunk.length + sentence.length <= maxSize) { currentChunk += sentence; } else { if (currentChunk.length > 0) { const startOffset = content.indexOf(currentChunk, currentStart); chunks.push(this.createChunk(currentChunk, chunkIndex++, startOffset, startOffset + currentChunk.length)); currentStart = startOffset + 1; } currentChunk = sentence; } } // Add remaining chunk if (currentChunk.length > 0) { const startOffset = content.indexOf(currentChunk, currentStart); chunks.push(this.createChunk(currentChunk, chunkIndex, startOffset, startOffset + currentChunk.length)); } return chunks; } /** * Split content into sentences */ splitIntoSentences(content) { // Simple regex-based sentence splitting // Handles common abbreviations and sentence endings const sentencePattern = /[^.!?]*[.!?]+(?:\s|$)/g; const sentences = []; let match; while ((match = sentencePattern.exec(content)) !== null) { sentences.push(match[0]); } // Handle remaining content without sentence ending const lastIndex = sentences.reduce((acc, s) => acc + s.length, 0); if (lastIndex < content.length) { sentences.push(content.slice(lastIndex)); } return sentences; } }