@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

1,296 lines (1,295 loc) • 37.4 kB

TypeScript

/** * RAG Type Definitions * * Canonical type file for all RAG (Retrieval-Augmented Generation) interfaces. * All exported interfaces from src/lib/rag/ are collected here as type aliases. */ /** * Citation format options */ export type CitationFormat = "inline" | "footnote" | "numbered" | "none"; /** * Chunker type - all chunking strategies implement this */ export type Chunker = { /** Strategy name for identification */ readonly strategy: ChunkingStrategy; /** * Split text into chunks * @param text - The text to chunk * @param config - Strategy-specific configuration * @returns Array of chunks */ chunk(text: string, config?: BaseChunkerConfig): Promise<Chunk[]>; }; /** * Context assembly options */ export type ContextAssemblyOptions = { /** Maximum characters in assembled context */ maxChars?: number; /** Maximum tokens (approximate, 4 chars/token) */ maxTokens?: number; /** Citation format to use */ citationFormat?: CitationFormat; /** Separator between chunks */ separator?: string; /** Include chunk metadata in context */ includeMetadata?: boolean; /** Deduplicate overlapping content */ deduplicate?: boolean; /** Similarity threshold for deduplication (0-1) */ dedupeThreshold?: number; /** Order by relevance score */ orderByRelevance?: boolean; /** Include section headers */ includeSectionHeaders?: boolean; /** Header template (use {index}, {source}, {score} placeholders) */ headerTemplate?: string; }; /** * Context window representation */ export type ContextWindow = { /** Assembled context text */ text: string; /** Number of chunks included */ chunkCount: number; /** Total character count */ charCount: number; /** Estimated token count */ tokenCount: number; /** Chunks that were truncated/excluded */ truncatedChunks: number; /** Citation map (id -> citation text) */ citations: Map<string, string>; }; /** * Supported metadata extractor types */ export type MetadataExtractorType = "llm" | "title" | "summary" | "keywords" | "questions" | "custom" | "composite"; /** * Metadata Extractor type - all extractors implement this */ export type MetadataExtractor = { /** Extractor type identifier */ readonly type: MetadataExtractorType; /** * Extract metadata from chunks * @param chunks - Array of chunks to extract metadata from * @param params - Extraction parameters * @returns Array of extraction results */ extract(chunks: Chunk[], params?: ExtractParams): Promise<ExtractionResult[]>; }; /** * Metadata extractor configuration */ export type MetadataExtractorConfig = { /** Extractor type */ type: MetadataExtractorType; /** Language model provider */ provider?: string; /** Model name for LLM-based extraction */ modelName?: string; /** Custom prompt template */ promptTemplate?: string; /** Maximum tokens for LLM response */ maxTokens?: number; /** Temperature for LLM generation */ temperature?: number; }; /** * Metadata extractor metadata for discovery and documentation */ export type MetadataExtractorMetadata = { /** Human-readable description */ description: string; /** Default configuration */ defaultConfig: Partial<MetadataExtractorConfig>; /** Supported configuration options */ supportedOptions: string[]; /** Recommended use cases */ useCases: string[]; /** Alternative names for this extractor */ aliases: string[]; /** Whether this extractor requires an AI model */ requiresModel: boolean; /** Extraction types this extractor can produce */ extractionTypes: string[]; }; /** * RAG-specific retry configuration */ export type RAGRetryConfig = { /** Maximum number of retry attempts (default: 3) */ maxRetries: number; /** Initial delay in ms (default: 1000) */ initialDelay: number; /** Maximum delay in ms (default: 30000) */ maxDelay: number; /** Backoff multiplier (default: 2) */ backoffMultiplier: number; /** Whether to add jitter (default: true) */ jitter: boolean; /** * Custom function to determine if error is retryable. * * Note: In `isRetryable()`, this callback is invoked *before* the built-in * abort-error check. If you provide a custom `shouldRetry`, it should * explicitly handle abort errors (e.g. return `false` for them) when * cancellation correctness is required. Otherwise an aborted operation * could be retried instead of propagating immediately. */ shouldRetry?: (error: Error) => boolean; /** Retryable error codes */ retryableErrorCodes?: string[]; /** Retryable HTTP status codes */ retryableStatusCodes?: number[]; }; /** * Circuit breaker configuration */ export type RAGCircuitBreakerConfig = { /** Number of failures before opening circuit (default: 5) */ failureThreshold: number; /** Time in ms before attempting reset (default: 60000) */ resetTimeout: number; /** Max calls allowed in half-open state (default: 3) */ halfOpenMaxCalls: number; /** Operation timeout in ms (default: 30000) */ operationTimeout: number; /** Minimum calls before calculating failure rate (default: 10) */ minimumCallsBeforeCalculation: number; /** Time window for statistics in ms (default: 300000 - 5 minutes) */ statisticsWindowSize: number; }; /** * Circuit breaker statistics */ export type RAGCircuitBreakerStats = { state: CircuitState; totalCalls: number; successfulCalls: number; failedCalls: number; failureRate: number; windowCalls: number; lastStateChange: Date; nextRetryTime?: Date; halfOpenCalls: number; averageLatency: number; p95Latency: number; }; /** * Embedding model configuration */ export type EmbeddingModelConfig = { provider: string; modelName: string; }; /** * Generation model configuration */ export type GenerationModelConfig = { provider: string; modelName: string; temperature?: number; maxTokens?: number; }; /** * RAG pipeline configuration */ export type RAGPipelineConfig = { /** Pipeline identifier */ id?: string; /** Vector store instance (defaults to in-memory) */ vectorStore?: VectorStore; /** BM25 index for hybrid search (defaults to in-memory) */ bm25Index?: BM25Index; /** Index name for vector store */ indexName?: string; /** Embedding model configuration */ embeddingModel: EmbeddingModelConfig; /** Generation model configuration (for RAG responses) */ generationModel?: GenerationModelConfig; /** Default chunking strategy */ defaultChunkingStrategy?: ChunkingStrategy; /** Default chunk size */ defaultChunkSize?: number; /** Default chunk overlap */ defaultChunkOverlap?: number; /** Enable hybrid search (vector + BM25) */ enableHybridSearch?: boolean; /** Enable Graph RAG */ enableGraphRAG?: boolean; /** Graph RAG similarity threshold */ graphThreshold?: number; /** Default number of results to retrieve */ defaultTopK?: number; /** Enable reranking */ enableReranking?: boolean; /** Reranking model configuration */ rerankingModel?: EmbeddingModelConfig; }; /** * Ingestion options */ export type IngestOptions = { /** Chunking strategy override */ strategy?: ChunkingStrategy; /** Chunk size override */ chunkSize?: number; /** Chunk overlap override */ chunkOverlap?: number; /** Custom metadata to add */ metadata?: Record<string, unknown>; /** Extract metadata using LLM */ extractMetadata?: boolean; }; /** * Query options */ export type QueryOptions = { /** Number of chunks to retrieve */ topK?: number; /** Use hybrid search */ hybrid?: boolean; /** Use Graph RAG */ graph?: boolean; /** Enable reranking */ rerank?: boolean; /** Metadata filter */ filter?: Record<string, unknown>; /** Include sources in response */ includeSources?: boolean; /** Generate response (vs just retrieve) */ generate?: boolean; /** Custom system prompt for generation */ systemPrompt?: string; /** Temperature for generation */ temperature?: number; }; /** * Query response */ export type RAGResponse = { /** Generated answer (if generate=true) */ answer?: string; /** Retrieved context chunks */ context: string; /** Source documents/chunks */ sources: Array<{ id: string; text: string; score: number; metadata?: Record<string, unknown>; }>; /** Query metadata */ metadata: { queryTime: number; retrievalMethod: string; chunksRetrieved: number; reranked: boolean; }; }; /** * Pipeline statistics */ export type PipelineStats = { totalDocuments: number; totalChunks: number; indexName: string; embeddingDimension?: number; hybridSearchEnabled: boolean; graphRAGEnabled: boolean; }; /** * Supported reranker types */ export type RerankerType = "llm" | "cross-encoder" | "cohere" | "simple" | "batch"; /** * Reranker type - all rerankers implement this */ export type Reranker = { /** Reranker type identifier */ readonly type: RerankerType; /** * Rerank results based on query relevance * @param results - Vector search results to rerank * @param query - Original search query * @param options - Reranking options * @returns Reranked results with scores */ rerank(results: VectorQueryResult[], query: string, options?: RerankerOptions): Promise<RerankResult[]>; }; /** * Reranker configuration */ export type RerankerConfig = { /** Reranker type */ type: RerankerType; /** Model name for LLM-based rerankers */ model?: string | { provider: string; modelName: string; }; /** Provider for the model */ provider?: string; /** Number of results to return after reranking */ topK?: number; /** Scoring weights */ weights?: { semantic?: number; vector?: number; position?: number; }; /** API key for external services (e.g., Cohere) */ apiKey?: string; }; /** * Reranker metadata for discovery and documentation */ export type RerankerMetadata = { /** Human-readable description */ description: string; /** Default configuration */ defaultConfig: Partial<RerankerConfig>; /** Supported configuration options */ supportedOptions: string[]; /** Recommended use cases */ useCases: string[]; /** Alternative names for this reranker */ aliases: string[]; /** Whether this reranker requires an AI model */ requiresModel: boolean; /** Whether this reranker requires external API */ requiresExternalAPI: boolean; }; /** * BM25 Index type * Implementations should provide sparse retrieval capabilities */ export type BM25Index = { /** * Search documents using BM25 algorithm * @param query - Search query string * @param topK - Number of results to return * @returns Array of BM25 results */ search(query: string, topK?: number): Promise<BM25Result[]>; /** * Add documents to the index * @param documents - Documents to index */ addDocuments(documents: Array<{ id: string; text: string; metadata?: Record<string, unknown>; }>): Promise<void>; }; /** * Hybrid search configuration for creating a search function */ export type HybridSearchOptions = { /** Vector store instance */ vectorStore: VectorStore; /** BM25 index instance */ bm25Index: BM25Index; /** Index name for vector store */ indexName: string; /** Embedding model configuration (optional - uses defaults from ProviderFactory if not specified) */ embeddingModel?: { provider?: string; modelName?: string; }; /** Default search configuration */ defaultConfig?: HybridSearchConfig; }; /** * Abstract vector store type * Vector stores should implement this type to work with the query tool */ export type VectorStore = { query(params: { indexName: string; queryVector: number[]; topK?: number; filter?: MetadataFilter; includeVectors?: boolean; }): Promise<VectorQueryResult[]>; }; /** * Document loader options */ export type LoaderOptions = { /** Custom metadata to add to document */ metadata?: Record<string, unknown>; /** Encoding for text files */ encoding?: BufferEncoding; /** Document type override */ type?: DocumentType; }; /** * Web loader options */ export type WebLoaderOptions = LoaderOptions & { /** Request timeout in milliseconds */ timeout?: number; /** Custom headers for request */ headers?: Record<string, string>; /** Extract only main content (remove navigation, ads, etc.) */ extractMainContent?: boolean; /** Selector for main content (CSS selector) */ contentSelector?: string; /** User agent string */ userAgent?: string; }; /** * PDF loader options */ export type PDFLoaderOptions = LoaderOptions & { /** Page range to extract (e.g., "1-5" or "1,3,5") */ pageRange?: string; /** Extract images as base64 */ extractImages?: boolean; /** OCR for scanned documents */ enableOCR?: boolean; /** Preserve layout formatting */ preserveLayout?: boolean; }; /** * CSV loader options */ export type CSVLoaderOptions = LoaderOptions & { /** Delimiter character */ delimiter?: string; /** Whether first row is header */ hasHeader?: boolean; /** Column names (if no header) */ columns?: string[]; /** Output format */ outputFormat?: "text" | "json" | "markdown"; }; /** * Abstract document loader type */ export type DocumentLoader = { /** * Load document from source * @param source - File path, URL, or content * @param options - Loader options * @returns Promise resolving to MDocument */ load(source: string, options?: LoaderOptions): Promise<import("../rag/document/MDocument.js").MDocument>; /** * Check if loader can handle the source * @param source - File path, URL, or content * @returns True if loader can handle the source */ canHandle(source: string): boolean; }; /** Circuit breaker state. */ export type CircuitState = "closed" | "open" | "half-open"; /** Event map for RAG circuit breaker. */ export type RAGCircuitBreakerEvents = { stateChange: [ { oldState: CircuitState; newState: CircuitState; reason: string; timestamp: Date; } ]; callSuccess: [{ duration: number; timestamp: Date; operationType?: string; }]; callFailure: [ { error: string; duration: number; timestamp: Date; operationType?: string; } ]; circuitOpen: [{ failureRate: number; totalCalls: number; timestamp: Date; }]; circuitHalfOpen: [{ timestamp: Date; }]; circuitClosed: [{ timestamp: Date; }]; }; import type { Tool } from "ai"; /** Prepared RAG tool ready for injection into generate/stream. */ export type RAGPreparedTool = { /** The tool to inject into the tools Record */ tool: Tool; /** Tool name (key for the tools Record) */ toolName: string; /** Number of chunks indexed */ chunksIndexed: number; /** Number of files loaded */ filesLoaded: number; }; /** * RAG configuration for generate() and stream() APIs. * * When provided, NeuroLink automatically: * 1. Loads the specified files * 2. Chunks them using the selected strategy * 3. Generates embeddings * 4. Stores in an in-memory vector store * 5. Creates a search tool the AI can invoke on demand * * @example * ```typescript * const result = await neurolink.generate({ * input: { text: "What is RAG?" }, * provider: "vertex", * rag: { * files: ["./docs/guide.md", "./docs/api.md"], * strategy: "markdown", * chunkSize: 512, * topK: 5, * } * }); * ``` */ export type RAGConfig = { /** File paths to load and index for retrieval */ files: string[]; /** * Chunking strategy to use. If not specified, auto-detected from file extension. * @default "recursive" */ strategy?: ChunkingStrategy; /** * Maximum chunk size in characters. * @default 1000 */ chunkSize?: number; /** * Overlap between adjacent chunks in characters. * @default 200 */ chunkOverlap?: number; /** * Number of top results to retrieve per query. * @default 5 */ topK?: number; /** * Tool name visible to the AI model. * @default "search_knowledge_base" */ toolName?: string; /** * Tool description for the AI model explaining what the knowledge base contains. * @default "Search the loaded documents for relevant information to answer the user's question" */ toolDescription?: string; /** * Embedding model provider for generating embeddings. * Defaults to the same provider used for generation. */ embeddingProvider?: string; /** * Embedding model name. * Defaults to the provider's default embedding model. */ embeddingModel?: string; }; /** * Supported document types for processing */ export type DocumentType = "text" | "markdown" | "html" | "json" | "latex" | "csv" | "pdf"; /** * Chunk metadata for tracking source and position */ export type ChunkMetadata = { /** Source document identifier */ documentId: string; /** Original document filename or URL */ source?: string; /** Position in the original document (0-indexed) */ chunkIndex: number; /** Total number of chunks from the document */ totalChunks?: number; /** Start character position in original text */ startPosition?: number; /** End character position in original text */ endPosition?: number; /** Document type (markdown, html, json, etc.) */ documentType?: DocumentType; /** Custom metadata from extraction */ custom?: Record<string, unknown>; /** Extracted title (from metadata extraction) */ title?: string; /** Extracted summary (from metadata extraction) */ summary?: string; /** Extracted keywords (from metadata extraction) */ keywords?: string[]; /** Header level for markdown/html chunks */ headerLevel?: number; /** Header text for structured documents */ header?: string; /** JSON path for JSON chunks */ jsonPath?: string; /** LaTeX environment name */ latexEnvironment?: string; }; /** * Base chunk result with text and metadata */ export type Chunk = { /** Unique identifier for the chunk */ id: string; /** The text content of the chunk */ text: string; /** Metadata associated with the chunk */ metadata: ChunkMetadata; /** Optional embedding vector (populated after embedding) */ embedding?: number[]; }; /** * Available chunking strategy types */ export type ChunkingStrategy = "character" | "recursive" | "sentence" | "token" | "markdown" | "html" | "json" | "latex" | "semantic" | "semantic-markdown"; /** * Validation result for chunker configuration */ export type ChunkerValidationResult = { valid: boolean; errors: string[]; warnings: string[]; }; /** * Base configuration for all chunkers */ export type BaseChunkerConfig = { /** Maximum chunk size (interpretation varies by strategy) */ maxSize?: number; /** Minimum chunk size */ minSize?: number; /** Overlap between consecutive chunks */ overlap?: number; /** Whether to trim whitespace from chunks */ trimWhitespace?: boolean; /** Custom metadata to add to all chunks */ metadata?: Record<string, unknown>; /** Whether to preserve metadata from source document */ preserveMetadata?: boolean; }; /** * Character chunker configuration * Simple character-based splitting */ export type CharacterChunkerConfig = BaseChunkerConfig & { /** Character separator (default: "") */ separator?: string; /** Keep separator in chunks */ keepSeparator?: boolean; }; /** * Recursive chunker configuration * Smart splitting based on content structure */ export type RecursiveChunkerConfig = BaseChunkerConfig & { /** Ordered list of separators to try (default: ["\n\n", "\n", " ", ""]) */ separators?: string[]; /** Whether separators are regex patterns */ isSeparatorRegex?: boolean; /** Whether to keep separators in the output chunks */ keepSeparators?: boolean; }; /** * Sentence chunker configuration * Sentence-aware splitting */ export type SentenceChunkerConfig = BaseChunkerConfig & { /** Sentence ending characters (default: [".", "!", "?", "\n"]) */ sentenceEnders?: string[]; /** Minimum sentences per chunk */ minSentences?: number; /** Maximum sentences per chunk */ maxSentences?: number; }; /** * Token chunker configuration * Token-aware splitting using tokenizer */ export type TokenChunkerConfig = BaseChunkerConfig & { /** Tokenizer to use (default: "cl100k_base" for GPT models) */ tokenizer?: string; /** Model name for token counting (alternative to tokenizer) */ modelName?: string; /** Maximum tokens per chunk */ maxTokens?: number; /** Token overlap between chunks */ tokenOverlap?: number; }; /** * Markdown chunker configuration * Structure-aware markdown splitting */ export type MarkdownChunkerConfig = BaseChunkerConfig & { /** Header levels to split on (default: [1, 2, 3]) */ headerLevels?: number[]; /** Include code blocks as single chunks */ preserveCodeBlocks?: boolean; /** Include the header in the chunk content */ includeHeader?: boolean; /** Strip markdown formatting from output */ stripFormatting?: boolean; }; /** * HTML chunker configuration * HTML structure-aware splitting */ export type HTMLChunkerConfig = BaseChunkerConfig & { /** Tags to split on (default: ["div", "p", "section", "article"]) */ splitTags?: string[]; /** Tags to preserve as single chunks */ preserveTags?: string[]; /** Extract text only (strip HTML tags) */ extractTextOnly?: boolean; /** Include tag metadata in chunks */ includeTagMetadata?: boolean; }; /** * JSON chunker configuration * JSON structure-aware splitting */ export type JSONChunkerConfig = BaseChunkerConfig & { /** Maximum depth to traverse */ maxDepth?: number; /** Keys to split on (arrays/objects at these keys become chunks) */ splitKeys?: string[]; /** Keys to preserve as single units */ preserveKeys?: string[]; /** Include JSON path in metadata */ includeJsonPath?: boolean; }; /** * LaTeX chunker configuration * LaTeX structure-aware splitting */ export type LaTeXChunkerConfig = BaseChunkerConfig & { /** Environments to split on (default: ["section", "subsection", "chapter"]) */ splitEnvironments?: string[]; /** Preserve math environments as single chunks */ preserveMath?: boolean; /** Include preamble as separate chunk */ includePreamble?: boolean; }; /** * Semantic chunker configuration * LLM-based semantic splitting */ export type SemanticChunkerConfig = BaseChunkerConfig & { /** Minimum tokens before considering a split */ joinThreshold?: number; /** Model for semantic analysis */ modelName?: string; /** Provider for the model */ provider?: string; /** Custom prompt for semantic grouping */ semanticPrompt?: string; /** Maximum header depth to consider for grouping */ maxHeaderDepth?: number; /** Similarity threshold for grouping (0-1) */ similarityThreshold?: number; }; /** * Union type for all chunker configurations */ export type ChunkerConfig = CharacterChunkerConfig | RecursiveChunkerConfig | SentenceChunkerConfig | TokenChunkerConfig | MarkdownChunkerConfig | HTMLChunkerConfig | JSONChunkerConfig | LaTeXChunkerConfig | SemanticChunkerConfig; /** * Chunker type - all chunking strategies implement this */ /** * Chunker metadata for factory registration */ export type ChunkerMetadata = { /** Human-readable description */ description: string; /** Supported document types */ supportedTypes?: DocumentType[]; /** Whether the chunker requires external dependencies */ requiresExternalDeps?: boolean; /** Default configuration (can be any chunker-specific config) */ defaultConfig?: Record<string, unknown>; /** Supported configuration options */ supportedOptions?: string[]; /** Use cases where this chunker excels */ useCases?: string[]; /** Alternative names/aliases for this chunker */ aliases?: string[]; }; /** * Metadata extraction types */ export type ExtractorType = "title" | "summary" | "keywords" | "questions" | "custom"; /** * Base configuration for metadata extractors */ export type BaseExtractorConfig = { /** Language model to use for extraction */ modelName?: string; /** Provider for the model */ provider?: string; /** Custom prompt template */ promptTemplate?: string; /** Maximum tokens for LLM response */ maxTokens?: number; /** Temperature for LLM generation */ temperature?: number; }; /** * Title extractor configuration */ export type TitleExtractorConfig = BaseExtractorConfig & { /** Number of nodes to use for title extraction */ nodes?: number; /** Template for processing individual nodes */ nodeTemplate?: string; /** Template for combining node results */ combineTemplate?: string; }; /** * Summary extractor configuration */ export type SummaryExtractorConfig = BaseExtractorConfig & { /** Summary types to generate */ summaryTypes?: ("current" | "previous" | "next")[]; /** Maximum summary length in words */ maxWords?: number; }; /** * Keyword extractor configuration */ export type KeywordExtractorConfig = BaseExtractorConfig & { /** Maximum number of keywords to extract */ maxKeywords?: number; /** Minimum keyword relevance score (0-1) */ minRelevance?: number; }; /** * Question-Answer extractor configuration */ export type QuestionExtractorConfig = BaseExtractorConfig & { /** Number of Q&A pairs to generate */ numQuestions?: number; /** Include answers in output */ includeAnswers?: boolean; /** Generate embedding-only questions (shorter, more focused) */ embeddingOnly?: boolean; }; /** * Custom schema extractor configuration */ export type CustomSchemaExtractorConfig = BaseExtractorConfig & { /** Zod schema for structured extraction */ schema: unknown; /** Description of what to extract */ description?: string; }; /** * Combined extraction parameters */ export type ExtractParams = { /** Extract document title */ title?: boolean | TitleExtractorConfig; /** Extract document summary */ summary?: boolean | SummaryExtractorConfig; /** Extract keywords */ keywords?: boolean | KeywordExtractorConfig; /** Generate Q&A pairs */ questions?: boolean | QuestionExtractorConfig; /** Custom schema extraction */ custom?: CustomSchemaExtractorConfig; }; /** * Extraction result for a single chunk */ export type ExtractionResult = { /** Extracted title */ title?: string; /** Extracted summary */ summary?: string; /** Extracted keywords */ keywords?: string[]; /** Generated Q&A pairs */ questions?: Array<{ question: string; answer?: string; }>; /** Custom schema extraction result */ custom?: Record<string, unknown>; }; /** * Request context for dynamic configuration */ export type RequestContext = { userId?: string; tenantId?: string; environment?: string; custom?: Record<string, unknown>; }; /** * Metadata filter using MongoDB/Sift query syntax */ export type MetadataFilter = { $eq?: unknown; $ne?: unknown; $gt?: number; $gte?: number; $lt?: number; $lte?: number; $in?: unknown[]; $nin?: unknown[]; $and?: MetadataFilter[]; $or?: MetadataFilter[]; $not?: MetadataFilter; $nor?: MetadataFilter[]; $exists?: boolean; $contains?: string; $regex?: string; $size?: number; [field: string]: unknown; }; /** * Vector store query result */ export type VectorQueryResult = { /** Unique identifier */ id: string; /** Text content */ text?: string; /** Similarity/relevance score */ score?: number; /** Associated metadata */ metadata?: Record<string, unknown>; /** Embedding vector (if requested) */ vector?: number[]; }; /** * Reranker configuration */ /** * Provider-specific query options */ export type VectorProviderOptions = { /** Pinecone options */ pinecone?: { namespace?: string; sparseVector?: number[]; }; /** pgVector options */ pgVector?: { minScore?: number; ef?: number; probes?: number; }; /** Chroma options */ chroma?: { where?: Record<string, unknown>; whereDocument?: Record<string, unknown>; }; }; /** * Vector query tool configuration */ export type VectorQueryToolConfig = { /** Tool identifier */ id?: string; /** Tool description for AI agents */ description?: string; /** Index name within the vector store */ indexName: string; /** Embedding model specification */ embeddingModel: { provider: string; modelName: string; }; /** Enable metadata filtering */ enableFilter?: boolean; /** Include embedding vectors in results */ includeVectors?: boolean; /** Include full source objects in results */ includeSources?: boolean; /** Number of results to return */ topK?: number; /** Reranker configuration */ reranker?: RerankerConfig; /** Provider-specific options */ providerOptions?: VectorProviderOptions; }; /** * Vector query result wrapper */ export type VectorQueryResponse = { /** Formatted relevant context string */ relevantContext: string; /** Source query results */ sources: VectorQueryResult[]; /** Total results found */ totalResults: number; /** Query metadata */ metadata: { queryTime: number; reranked: boolean; filtered: boolean; }; }; /** * BM25 search result */ export type BM25Result = { /** Document ID */ id: string; /** BM25 score */ score: number; /** Document text */ text: string; /** Associated metadata */ metadata?: Record<string, unknown>; }; /** * Hybrid search configuration */ export type HybridSearchConfig = { /** Weight for vector search (0-1) */ vectorWeight?: number; /** Weight for BM25 search (0-1) */ bm25Weight?: number; /** Fusion method */ fusionMethod?: "rrf" | "linear"; /** RRF k parameter */ rrfK?: number; /** Number of results to return */ topK?: number; /** Enable reranking */ enableReranking?: boolean; /** Reranker configuration */ reranker?: RerankerConfig; }; /** * Hybrid search result */ export type HybridSearchResult = { /** Document ID */ id: string; /** Combined score */ score: number; /** Document text */ text: string; /** Associated metadata */ metadata?: Record<string, unknown>; /** Score breakdown */ scores?: { vector?: number; bm25?: number; combined?: number; reranked?: number; }; }; /** * Graph node representing a document chunk */ export type GraphNode = { /** Unique node identifier */ id: string; /** Text content of the node */ content: string; /** Node metadata */ metadata: Record<string, unknown>; /** Embedding vector */ embedding?: number[]; }; /** * Graph edge representing semantic relationship */ export type GraphEdge = { /** Source node ID */ source: string; /** Target node ID */ target: string; /** Edge weight (similarity score) */ weight: number; /** Edge type */ type?: string; }; /** * Chunk input for graph creation */ export type GraphChunk = { /** Chunk text content */ text: string; /** Chunk metadata */ metadata?: Record<string, unknown>; }; /** * Embedding input for graph creation */ export type GraphEmbedding = { /** Embedding vector */ vector: number[]; }; /** * Ranked node result from graph query */ export type RankedNode = { /** Node ID */ id: string; /** Node content */ content: string; /** Node metadata */ metadata: Record<string, unknown>; /** Relevance score */ score: number; }; /** * Graph RAG configuration */ export type GraphRAGConfig = { /** Embedding vector dimension (default: 1536) */ dimension?: number; /** Similarity threshold for edge creation (default: 0.7) */ threshold?: number; }; /** * Graph query parameters */ export type GraphQueryParams = { /** Query embedding vector */ query: number[]; /** Number of results to return (default: 10) */ topK?: number; /** Random walk steps (default: 100) */ randomWalkSteps?: number; /** Restart probability for random walk (default: 0.15) */ restartProb?: number; }; /** * Graph statistics */ export type GraphStats = { nodeCount: number; edgeCount: number; avgDegree: number; threshold: number; }; /** * Reranker type options */ /** * Reranker options */ export type RerankerOptions = { /** Pre-computed query embedding */ queryEmbedding?: number[]; /** Number of results to return after reranking */ topK?: number; /** Scoring weights (must sum to 1.0) */ weights?: { semantic?: number; vector?: number; position?: number; }; }; /** * Reranked result with detailed scoring */ export type RerankResult = { /** Original query result */ result: VectorQueryResult; /** Combined reranking score (0-1) */ score: number; /** Detailed score breakdown */ details: { semantic: number; vector: number; position: number; queryAnalysis?: string; }; }; /** * MDocument configuration */ export type MDocumentConfig = { /** Document type */ type: DocumentType; /** Custom metadata */ metadata?: Record<string, unknown>; }; /** * Chunk parameters for MDocument */ export type ChunkParams = { /** Chunking strategy to use */ strategy?: ChunkingStrategy; /** Strategy-specific configuration */ config?: ChunkerConfig; /** Metadata extraction options */ extract?: ExtractParams; }; /** * RAG CLI command arguments */ export type RAGCommandArgs = { /** Input file path */ file?: string; /** Query string */ query?: string; /** Chunking strategy */ strategy?: ChunkingStrategy; /** Maximum chunk size */ maxSize?: number; /** Chunk overlap */ overlap?: number; /** Output format */ format?: "json" | "text" | "table"; /** Enable verbose output */ verbose?: boolean; /** Provider for embeddings */ provider?: string; /** Model for embeddings */ model?: string; /** Number of results */ topK?: number; /** Index name */ index?: string; /** Enable hybrid search */ hybrid?: boolean; /** Use Graph RAG */ graph?: boolean; }; /** Options for the recursive JSON chunk extractor. */ export type ExtractChunksOptions = { data: unknown; path: string; depth: number; maxDepth: number; maxSize: number; splitKeys: string[]; preserveKeys: string[]; includeJsonPath: boolean; }; /** Document processing state held by MDocument. */ export type DocumentState = { content: string; type: DocumentType; metadata: Record<string, unknown>; chunks: Chunk[]; embeddings: number[][]; history: string[]; }; /** Canonical RAG error code. */ export type RAGErrorCode = "RAG_CHUNKING_ERROR" | "RAG_CHUNKING_INVALID_CONFIG" | "RAG_CHUNKING_STRATEGY_NOT_FOUND" | "RAG_CHUNKING_EMPTY_CONTENT" | "RAG_CHUNKING_SIZE_EXCEEDED" | "RAG_METADATA_EXTRACTION_ERROR" | "RAG_METADATA_EXTRACTION_TIMEOUT" | "RAG_METADATA_SCHEMA_INVALID" | "RAG_METADATA_EXTRACTOR_NOT_FOUND" | "RAG_EMBEDDING_ERROR" | "RAG_EMBEDDING_DIMENSION_MISMATCH" | "RAG_EMBEDDING_RATE_LIMIT" | "RAG_EMBEDDING_PROVIDER_ERROR" | "RAG_VECTOR_QUERY_ERROR" | "RAG_VECTOR_QUERY_TIMEOUT" | "RAG_VECTOR_STORE_UNAVAILABLE" | "RAG_VECTOR_STORE_CONNECTION_ERROR" | "RAG_VECTOR_INDEX_NOT_FOUND" | "RAG_RERANKER_ERROR" | "RAG_RERANKER_NOT_FOUND" | "RAG_RERANKER_API_ERROR" | "RAG_GRAPH_ERROR" | "RAG_GRAPH_TRAVERSAL_ERROR" | "RAG_GRAPH_NODE_NOT_FOUND" | "RAG_PIPELINE_ERROR" | "RAG_PIPELINE_STAGE_FAILED" | "RAG_PIPELINE_PARTIAL_FAILURE" | "RAG_CIRCUIT_BREAKER_OPEN" | "RAG_CIRCUIT_BREAKER_HALF_OPEN_LIMIT" | "RAG_OPERATION_TIMEOUT" | "RAG_RETRY_EXHAUSTED" | "RAG_INVALID_CONFIGURATION";