UNPKG

mongodocs-mcp

Version:

Lightning-fast semantic search for MongoDB documentation via Model Context Protocol. 10,000+ documents, <500ms search.

88 lines 2.27 kB
/** * Smart Document Chunker - Adaptive chunking for any content type * * Merges the best of: * - DocumentChunker (basic chunking) * - EnhancedDocumentChunker (quality scoring) * * Features: * - Automatic content type detection * - Optimal chunk sizing for voyage-3 vs voyage-code-3 * - Semantic boundary preservation * - Code block integrity */ import { Document, ChunkedDocument } from '../types/index.js'; export interface ChunkOptions { targetSize?: number; maxSize?: number; minSize?: number; preserveCode?: boolean; preserveExamples?: boolean; semanticBoundaries?: boolean; overlap?: number; chunkingStrategy?: 'fixed' | 'semantic' | 'proposition'; semanticThreshold?: number; breakpointType?: 'percentile' | 'standard_deviation' | 'interquartile' | 'gradient'; } export declare class SmartChunker { private encoder; private qualityScorer; private readonly DEFAULT_OPTIONS; /** * Chunk multiple documents intelligently */ chunkDocuments(documents: Document[], options?: ChunkOptions): Promise<ChunkedDocument[]>; /** * Adapt chunking options based on content type */ private adaptOptionsForContent; /** * Chunk a single document with quality awareness */ private chunkDocument; /** * Parse document into logical sections */ private parseDocumentStructure; /** * Chunk a section intelligently */ private chunkSection; /** * Split content by sentences */ private splitBySentences; /** * Extract code blocks from content */ private extractCodeBlocks; /** * Split large code block carefully */ private splitLargeCodeBlock; /** * Get overlap text from previous chunk */ private getOverlapText; /** * Create a chunk with metadata */ private createChunk; /** * Detect if content contains code */ private detectCode; /** * Detect header in markdown or RST */ private detectHeader; /** * Split text into sentences */ private splitIntoSentences; /** * Generate content hash */ hashContent(content: string): string; } //# sourceMappingURL=smart-chunker.d.ts.map