mongodocs-mcp
Version:
Lightning-fast semantic search for MongoDB documentation via Model Context Protocol. 10,000+ documents, <500ms search.
88 lines • 2.27 kB
TypeScript
/**
* Smart Document Chunker - Adaptive chunking for any content type
*
* Merges the best of:
* - DocumentChunker (basic chunking)
* - EnhancedDocumentChunker (quality scoring)
*
* Features:
* - Automatic content type detection
* - Optimal chunk sizing for voyage-3 vs voyage-code-3
* - Semantic boundary preservation
* - Code block integrity
*/
import { Document, ChunkedDocument } from '../types/index.js';
export interface ChunkOptions {
targetSize?: number;
maxSize?: number;
minSize?: number;
preserveCode?: boolean;
preserveExamples?: boolean;
semanticBoundaries?: boolean;
overlap?: number;
chunkingStrategy?: 'fixed' | 'semantic' | 'proposition';
semanticThreshold?: number;
breakpointType?: 'percentile' | 'standard_deviation' | 'interquartile' | 'gradient';
}
export declare class SmartChunker {
private encoder;
private qualityScorer;
private readonly DEFAULT_OPTIONS;
/**
* Chunk multiple documents intelligently
*/
chunkDocuments(documents: Document[], options?: ChunkOptions): Promise<ChunkedDocument[]>;
/**
* Adapt chunking options based on content type
*/
private adaptOptionsForContent;
/**
* Chunk a single document with quality awareness
*/
private chunkDocument;
/**
* Parse document into logical sections
*/
private parseDocumentStructure;
/**
* Chunk a section intelligently
*/
private chunkSection;
/**
* Split content by sentences
*/
private splitBySentences;
/**
* Extract code blocks from content
*/
private extractCodeBlocks;
/**
* Split large code block carefully
*/
private splitLargeCodeBlock;
/**
* Get overlap text from previous chunk
*/
private getOverlapText;
/**
* Create a chunk with metadata
*/
private createChunk;
/**
* Detect if content contains code
*/
private detectCode;
/**
* Detect header in markdown or RST
*/
private detectHeader;
/**
* Split text into sentences
*/
private splitIntoSentences;
/**
* Generate content hash
*/
hashContent(content: string): string;
}
//# sourceMappingURL=smart-chunker.d.ts.map