@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
67 lines (66 loc) • 2.36 kB
JavaScript
/**
* Sentence Chunker
*
* Splits text by sentence boundaries for semantically meaningful chunks.
*/
import { BaseChunker, DEFAULT_CHUNKER_CONFIG } from "./BaseChunker.js";
/**
* Sentence Chunker
*/
export class SentenceChunker extends BaseChunker {
strategy = "sentence";
getDefaultConfig() {
return {
...DEFAULT_CHUNKER_CONFIG,
maxSize: 1000,
overlap: 1, // Overlap in sentences
};
}
async doChunk(content, config) {
const maxSize = config.maxSize ?? 1000;
// Simple sentence splitting (can be enhanced with NLP)
const sentences = this.splitIntoSentences(content);
const chunks = [];
let currentChunk = "";
let currentStart = 0;
let chunkIndex = 0;
for (const sentence of sentences) {
if (currentChunk.length + sentence.length <= maxSize) {
currentChunk += sentence;
}
else {
if (currentChunk.length > 0) {
const startOffset = content.indexOf(currentChunk, currentStart);
chunks.push(this.createChunk(currentChunk, chunkIndex++, startOffset, startOffset + currentChunk.length));
currentStart = startOffset + 1;
}
currentChunk = sentence;
}
}
// Add remaining chunk
if (currentChunk.length > 0) {
const startOffset = content.indexOf(currentChunk, currentStart);
chunks.push(this.createChunk(currentChunk, chunkIndex, startOffset, startOffset + currentChunk.length));
}
return chunks;
}
/**
* Split content into sentences
*/
splitIntoSentences(content) {
// Simple regex-based sentence splitting
// Handles common abbreviations and sentence endings
const sentencePattern = /[^.!?]*[.!?]+(?:\s|$)/g;
const sentences = [];
let match;
while ((match = sentencePattern.exec(content)) !== null) {
sentences.push(match[0]);
}
// Handle remaining content without sentence ending
const lastIndex = sentences.reduce((acc, s) => acc + s.length, 0);
if (lastIndex < content.length) {
sentences.push(content.slice(lastIndex));
}
return sentences;
}
}