@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
1,296 lines (1,295 loc) • 37.4 kB
TypeScript
/**
* RAG Type Definitions
*
* Canonical type file for all RAG (Retrieval-Augmented Generation) interfaces.
* All exported interfaces from src/lib/rag/ are collected here as type aliases.
*/
/**
* Citation format options
*/
export type CitationFormat = "inline" | "footnote" | "numbered" | "none";
/**
* Chunker type - all chunking strategies implement this
*/
export type Chunker = {
/** Strategy name for identification */
readonly strategy: ChunkingStrategy;
/**
* Split text into chunks
* @param text - The text to chunk
* @param config - Strategy-specific configuration
* @returns Array of chunks
*/
chunk(text: string, config?: BaseChunkerConfig): Promise<Chunk[]>;
};
/**
* Context assembly options
*/
export type ContextAssemblyOptions = {
/** Maximum characters in assembled context */
maxChars?: number;
/** Maximum tokens (approximate, 4 chars/token) */
maxTokens?: number;
/** Citation format to use */
citationFormat?: CitationFormat;
/** Separator between chunks */
separator?: string;
/** Include chunk metadata in context */
includeMetadata?: boolean;
/** Deduplicate overlapping content */
deduplicate?: boolean;
/** Similarity threshold for deduplication (0-1) */
dedupeThreshold?: number;
/** Order by relevance score */
orderByRelevance?: boolean;
/** Include section headers */
includeSectionHeaders?: boolean;
/** Header template (use {index}, {source}, {score} placeholders) */
headerTemplate?: string;
};
/**
* Context window representation
*/
export type ContextWindow = {
/** Assembled context text */
text: string;
/** Number of chunks included */
chunkCount: number;
/** Total character count */
charCount: number;
/** Estimated token count */
tokenCount: number;
/** Chunks that were truncated/excluded */
truncatedChunks: number;
/** Citation map (id -> citation text) */
citations: Map<string, string>;
};
/**
* Supported metadata extractor types
*/
export type MetadataExtractorType = "llm" | "title" | "summary" | "keywords" | "questions" | "custom" | "composite";
/**
* Metadata Extractor type - all extractors implement this
*/
export type MetadataExtractor = {
/** Extractor type identifier */
readonly type: MetadataExtractorType;
/**
* Extract metadata from chunks
* @param chunks - Array of chunks to extract metadata from
* @param params - Extraction parameters
* @returns Array of extraction results
*/
extract(chunks: Chunk[], params?: ExtractParams): Promise<ExtractionResult[]>;
};
/**
* Metadata extractor configuration
*/
export type MetadataExtractorConfig = {
/** Extractor type */
type: MetadataExtractorType;
/** Language model provider */
provider?: string;
/** Model name for LLM-based extraction */
modelName?: string;
/** Custom prompt template */
promptTemplate?: string;
/** Maximum tokens for LLM response */
maxTokens?: number;
/** Temperature for LLM generation */
temperature?: number;
};
/**
* Metadata extractor metadata for discovery and documentation
*/
export type MetadataExtractorMetadata = {
/** Human-readable description */
description: string;
/** Default configuration */
defaultConfig: Partial<MetadataExtractorConfig>;
/** Supported configuration options */
supportedOptions: string[];
/** Recommended use cases */
useCases: string[];
/** Alternative names for this extractor */
aliases: string[];
/** Whether this extractor requires an AI model */
requiresModel: boolean;
/** Extraction types this extractor can produce */
extractionTypes: string[];
};
/**
* RAG-specific retry configuration
*/
export type RAGRetryConfig = {
/** Maximum number of retry attempts (default: 3) */
maxRetries: number;
/** Initial delay in ms (default: 1000) */
initialDelay: number;
/** Maximum delay in ms (default: 30000) */
maxDelay: number;
/** Backoff multiplier (default: 2) */
backoffMultiplier: number;
/** Whether to add jitter (default: true) */
jitter: boolean;
/**
* Custom function to determine if error is retryable.
*
* Note: In `isRetryable()`, this callback is invoked *before* the built-in
* abort-error check. If you provide a custom `shouldRetry`, it should
* explicitly handle abort errors (e.g. return `false` for them) when
* cancellation correctness is required. Otherwise an aborted operation
* could be retried instead of propagating immediately.
*/
shouldRetry?: (error: Error) => boolean;
/** Retryable error codes */
retryableErrorCodes?: string[];
/** Retryable HTTP status codes */
retryableStatusCodes?: number[];
};
/**
* Circuit breaker configuration
*/
export type RAGCircuitBreakerConfig = {
/** Number of failures before opening circuit (default: 5) */
failureThreshold: number;
/** Time in ms before attempting reset (default: 60000) */
resetTimeout: number;
/** Max calls allowed in half-open state (default: 3) */
halfOpenMaxCalls: number;
/** Operation timeout in ms (default: 30000) */
operationTimeout: number;
/** Minimum calls before calculating failure rate (default: 10) */
minimumCallsBeforeCalculation: number;
/** Time window for statistics in ms (default: 300000 - 5 minutes) */
statisticsWindowSize: number;
};
/**
* Circuit breaker statistics
*/
export type RAGCircuitBreakerStats = {
state: CircuitState;
totalCalls: number;
successfulCalls: number;
failedCalls: number;
failureRate: number;
windowCalls: number;
lastStateChange: Date;
nextRetryTime?: Date;
halfOpenCalls: number;
averageLatency: number;
p95Latency: number;
};
/**
* Embedding model configuration
*/
export type EmbeddingModelConfig = {
provider: string;
modelName: string;
};
/**
* Generation model configuration
*/
export type GenerationModelConfig = {
provider: string;
modelName: string;
temperature?: number;
maxTokens?: number;
};
/**
* RAG pipeline configuration
*/
export type RAGPipelineConfig = {
/** Pipeline identifier */
id?: string;
/** Vector store instance (defaults to in-memory) */
vectorStore?: VectorStore;
/** BM25 index for hybrid search (defaults to in-memory) */
bm25Index?: BM25Index;
/** Index name for vector store */
indexName?: string;
/** Embedding model configuration */
embeddingModel: EmbeddingModelConfig;
/** Generation model configuration (for RAG responses) */
generationModel?: GenerationModelConfig;
/** Default chunking strategy */
defaultChunkingStrategy?: ChunkingStrategy;
/** Default chunk size */
defaultChunkSize?: number;
/** Default chunk overlap */
defaultChunkOverlap?: number;
/** Enable hybrid search (vector + BM25) */
enableHybridSearch?: boolean;
/** Enable Graph RAG */
enableGraphRAG?: boolean;
/** Graph RAG similarity threshold */
graphThreshold?: number;
/** Default number of results to retrieve */
defaultTopK?: number;
/** Enable reranking */
enableReranking?: boolean;
/** Reranking model configuration */
rerankingModel?: EmbeddingModelConfig;
};
/**
* Ingestion options
*/
export type IngestOptions = {
/** Chunking strategy override */
strategy?: ChunkingStrategy;
/** Chunk size override */
chunkSize?: number;
/** Chunk overlap override */
chunkOverlap?: number;
/** Custom metadata to add */
metadata?: Record<string, unknown>;
/** Extract metadata using LLM */
extractMetadata?: boolean;
};
/**
* Query options
*/
export type QueryOptions = {
/** Number of chunks to retrieve */
topK?: number;
/** Use hybrid search */
hybrid?: boolean;
/** Use Graph RAG */
graph?: boolean;
/** Enable reranking */
rerank?: boolean;
/** Metadata filter */
filter?: Record<string, unknown>;
/** Include sources in response */
includeSources?: boolean;
/** Generate response (vs just retrieve) */
generate?: boolean;
/** Custom system prompt for generation */
systemPrompt?: string;
/** Temperature for generation */
temperature?: number;
};
/**
* Query response
*/
export type RAGResponse = {
/** Generated answer (if generate=true) */
answer?: string;
/** Retrieved context chunks */
context: string;
/** Source documents/chunks */
sources: Array<{
id: string;
text: string;
score: number;
metadata?: Record<string, unknown>;
}>;
/** Query metadata */
metadata: {
queryTime: number;
retrievalMethod: string;
chunksRetrieved: number;
reranked: boolean;
};
};
/**
* Pipeline statistics
*/
export type PipelineStats = {
totalDocuments: number;
totalChunks: number;
indexName: string;
embeddingDimension?: number;
hybridSearchEnabled: boolean;
graphRAGEnabled: boolean;
};
/**
* Supported reranker types
*/
export type RerankerType = "llm" | "cross-encoder" | "cohere" | "simple" | "batch";
/**
* Reranker type - all rerankers implement this
*/
export type Reranker = {
/** Reranker type identifier */
readonly type: RerankerType;
/**
* Rerank results based on query relevance
* @param results - Vector search results to rerank
* @param query - Original search query
* @param options - Reranking options
* @returns Reranked results with scores
*/
rerank(results: VectorQueryResult[], query: string, options?: RerankerOptions): Promise<RerankResult[]>;
};
/**
* Reranker configuration
*/
export type RerankerConfig = {
/** Reranker type */
type: RerankerType;
/** Model name for LLM-based rerankers */
model?: string | {
provider: string;
modelName: string;
};
/** Provider for the model */
provider?: string;
/** Number of results to return after reranking */
topK?: number;
/** Scoring weights */
weights?: {
semantic?: number;
vector?: number;
position?: number;
};
/** API key for external services (e.g., Cohere) */
apiKey?: string;
};
/**
* Reranker metadata for discovery and documentation
*/
export type RerankerMetadata = {
/** Human-readable description */
description: string;
/** Default configuration */
defaultConfig: Partial<RerankerConfig>;
/** Supported configuration options */
supportedOptions: string[];
/** Recommended use cases */
useCases: string[];
/** Alternative names for this reranker */
aliases: string[];
/** Whether this reranker requires an AI model */
requiresModel: boolean;
/** Whether this reranker requires external API */
requiresExternalAPI: boolean;
};
/**
* BM25 Index type
* Implementations should provide sparse retrieval capabilities
*/
export type BM25Index = {
/**
* Search documents using BM25 algorithm
* @param query - Search query string
* @param topK - Number of results to return
* @returns Array of BM25 results
*/
search(query: string, topK?: number): Promise<BM25Result[]>;
/**
* Add documents to the index
* @param documents - Documents to index
*/
addDocuments(documents: Array<{
id: string;
text: string;
metadata?: Record<string, unknown>;
}>): Promise<void>;
};
/**
* Hybrid search configuration for creating a search function
*/
export type HybridSearchOptions = {
/** Vector store instance */
vectorStore: VectorStore;
/** BM25 index instance */
bm25Index: BM25Index;
/** Index name for vector store */
indexName: string;
/** Embedding model configuration (optional - uses defaults from ProviderFactory if not specified) */
embeddingModel?: {
provider?: string;
modelName?: string;
};
/** Default search configuration */
defaultConfig?: HybridSearchConfig;
};
/**
* Abstract vector store type
* Vector stores should implement this type to work with the query tool
*/
export type VectorStore = {
query(params: {
indexName: string;
queryVector: number[];
topK?: number;
filter?: MetadataFilter;
includeVectors?: boolean;
}): Promise<VectorQueryResult[]>;
};
/**
* Document loader options
*/
export type LoaderOptions = {
/** Custom metadata to add to document */
metadata?: Record<string, unknown>;
/** Encoding for text files */
encoding?: BufferEncoding;
/** Document type override */
type?: DocumentType;
};
/**
* Web loader options
*/
export type WebLoaderOptions = LoaderOptions & {
/** Request timeout in milliseconds */
timeout?: number;
/** Custom headers for request */
headers?: Record<string, string>;
/** Extract only main content (remove navigation, ads, etc.) */
extractMainContent?: boolean;
/** Selector for main content (CSS selector) */
contentSelector?: string;
/** User agent string */
userAgent?: string;
};
/**
* PDF loader options
*/
export type PDFLoaderOptions = LoaderOptions & {
/** Page range to extract (e.g., "1-5" or "1,3,5") */
pageRange?: string;
/** Extract images as base64 */
extractImages?: boolean;
/** OCR for scanned documents */
enableOCR?: boolean;
/** Preserve layout formatting */
preserveLayout?: boolean;
};
/**
* CSV loader options
*/
export type CSVLoaderOptions = LoaderOptions & {
/** Delimiter character */
delimiter?: string;
/** Whether first row is header */
hasHeader?: boolean;
/** Column names (if no header) */
columns?: string[];
/** Output format */
outputFormat?: "text" | "json" | "markdown";
};
/**
* Abstract document loader type
*/
export type DocumentLoader = {
/**
* Load document from source
* @param source - File path, URL, or content
* @param options - Loader options
* @returns Promise resolving to MDocument
*/
load(source: string, options?: LoaderOptions): Promise<import("../rag/document/MDocument.js").MDocument>;
/**
* Check if loader can handle the source
* @param source - File path, URL, or content
* @returns True if loader can handle the source
*/
canHandle(source: string): boolean;
};
/** Circuit breaker state. */
export type CircuitState = "closed" | "open" | "half-open";
/** Event map for RAG circuit breaker. */
export type RAGCircuitBreakerEvents = {
stateChange: [
{
oldState: CircuitState;
newState: CircuitState;
reason: string;
timestamp: Date;
}
];
callSuccess: [{
duration: number;
timestamp: Date;
operationType?: string;
}];
callFailure: [
{
error: string;
duration: number;
timestamp: Date;
operationType?: string;
}
];
circuitOpen: [{
failureRate: number;
totalCalls: number;
timestamp: Date;
}];
circuitHalfOpen: [{
timestamp: Date;
}];
circuitClosed: [{
timestamp: Date;
}];
};
import type { Tool } from "ai";
/** Prepared RAG tool ready for injection into generate/stream. */
export type RAGPreparedTool = {
/** The tool to inject into the tools Record */
tool: Tool;
/** Tool name (key for the tools Record) */
toolName: string;
/** Number of chunks indexed */
chunksIndexed: number;
/** Number of files loaded */
filesLoaded: number;
};
/**
* RAG configuration for generate() and stream() APIs.
*
* When provided, NeuroLink automatically:
* 1. Loads the specified files
* 2. Chunks them using the selected strategy
* 3. Generates embeddings
* 4. Stores in an in-memory vector store
* 5. Creates a search tool the AI can invoke on demand
*
* @example
* ```typescript
* const result = await neurolink.generate({
* input: { text: "What is RAG?" },
* provider: "vertex",
* rag: {
* files: ["./docs/guide.md", "./docs/api.md"],
* strategy: "markdown",
* chunkSize: 512,
* topK: 5,
* }
* });
* ```
*/
export type RAGConfig = {
/** File paths to load and index for retrieval */
files: string[];
/**
* Chunking strategy to use. If not specified, auto-detected from file extension.
* @default "recursive"
*/
strategy?: ChunkingStrategy;
/**
* Maximum chunk size in characters.
* @default 1000
*/
chunkSize?: number;
/**
* Overlap between adjacent chunks in characters.
* @default 200
*/
chunkOverlap?: number;
/**
* Number of top results to retrieve per query.
* @default 5
*/
topK?: number;
/**
* Tool name visible to the AI model.
* @default "search_knowledge_base"
*/
toolName?: string;
/**
* Tool description for the AI model explaining what the knowledge base contains.
* @default "Search the loaded documents for relevant information to answer the user's question"
*/
toolDescription?: string;
/**
* Embedding model provider for generating embeddings.
* Defaults to the same provider used for generation.
*/
embeddingProvider?: string;
/**
* Embedding model name.
* Defaults to the provider's default embedding model.
*/
embeddingModel?: string;
};
/**
* Supported document types for processing
*/
export type DocumentType = "text" | "markdown" | "html" | "json" | "latex" | "csv" | "pdf";
/**
* Chunk metadata for tracking source and position
*/
export type ChunkMetadata = {
/** Source document identifier */
documentId: string;
/** Original document filename or URL */
source?: string;
/** Position in the original document (0-indexed) */
chunkIndex: number;
/** Total number of chunks from the document */
totalChunks?: number;
/** Start character position in original text */
startPosition?: number;
/** End character position in original text */
endPosition?: number;
/** Document type (markdown, html, json, etc.) */
documentType?: DocumentType;
/** Custom metadata from extraction */
custom?: Record<string, unknown>;
/** Extracted title (from metadata extraction) */
title?: string;
/** Extracted summary (from metadata extraction) */
summary?: string;
/** Extracted keywords (from metadata extraction) */
keywords?: string[];
/** Header level for markdown/html chunks */
headerLevel?: number;
/** Header text for structured documents */
header?: string;
/** JSON path for JSON chunks */
jsonPath?: string;
/** LaTeX environment name */
latexEnvironment?: string;
};
/**
* Base chunk result with text and metadata
*/
export type Chunk = {
/** Unique identifier for the chunk */
id: string;
/** The text content of the chunk */
text: string;
/** Metadata associated with the chunk */
metadata: ChunkMetadata;
/** Optional embedding vector (populated after embedding) */
embedding?: number[];
};
/**
* Available chunking strategy types
*/
export type ChunkingStrategy = "character" | "recursive" | "sentence" | "token" | "markdown" | "html" | "json" | "latex" | "semantic" | "semantic-markdown";
/**
* Validation result for chunker configuration
*/
export type ChunkerValidationResult = {
valid: boolean;
errors: string[];
warnings: string[];
};
/**
* Base configuration for all chunkers
*/
export type BaseChunkerConfig = {
/** Maximum chunk size (interpretation varies by strategy) */
maxSize?: number;
/** Minimum chunk size */
minSize?: number;
/** Overlap between consecutive chunks */
overlap?: number;
/** Whether to trim whitespace from chunks */
trimWhitespace?: boolean;
/** Custom metadata to add to all chunks */
metadata?: Record<string, unknown>;
/** Whether to preserve metadata from source document */
preserveMetadata?: boolean;
};
/**
* Character chunker configuration
* Simple character-based splitting
*/
export type CharacterChunkerConfig = BaseChunkerConfig & {
/** Character separator (default: "") */
separator?: string;
/** Keep separator in chunks */
keepSeparator?: boolean;
};
/**
* Recursive chunker configuration
* Smart splitting based on content structure
*/
export type RecursiveChunkerConfig = BaseChunkerConfig & {
/** Ordered list of separators to try (default: ["\n\n", "\n", " ", ""]) */
separators?: string[];
/** Whether separators are regex patterns */
isSeparatorRegex?: boolean;
/** Whether to keep separators in the output chunks */
keepSeparators?: boolean;
};
/**
* Sentence chunker configuration
* Sentence-aware splitting
*/
export type SentenceChunkerConfig = BaseChunkerConfig & {
/** Sentence ending characters (default: [".", "!", "?", "\n"]) */
sentenceEnders?: string[];
/** Minimum sentences per chunk */
minSentences?: number;
/** Maximum sentences per chunk */
maxSentences?: number;
};
/**
* Token chunker configuration
* Token-aware splitting using tokenizer
*/
export type TokenChunkerConfig = BaseChunkerConfig & {
/** Tokenizer to use (default: "cl100k_base" for GPT models) */
tokenizer?: string;
/** Model name for token counting (alternative to tokenizer) */
modelName?: string;
/** Maximum tokens per chunk */
maxTokens?: number;
/** Token overlap between chunks */
tokenOverlap?: number;
};
/**
* Markdown chunker configuration
* Structure-aware markdown splitting
*/
export type MarkdownChunkerConfig = BaseChunkerConfig & {
/** Header levels to split on (default: [1, 2, 3]) */
headerLevels?: number[];
/** Include code blocks as single chunks */
preserveCodeBlocks?: boolean;
/** Include the header in the chunk content */
includeHeader?: boolean;
/** Strip markdown formatting from output */
stripFormatting?: boolean;
};
/**
* HTML chunker configuration
* HTML structure-aware splitting
*/
export type HTMLChunkerConfig = BaseChunkerConfig & {
/** Tags to split on (default: ["div", "p", "section", "article"]) */
splitTags?: string[];
/** Tags to preserve as single chunks */
preserveTags?: string[];
/** Extract text only (strip HTML tags) */
extractTextOnly?: boolean;
/** Include tag metadata in chunks */
includeTagMetadata?: boolean;
};
/**
* JSON chunker configuration
* JSON structure-aware splitting
*/
export type JSONChunkerConfig = BaseChunkerConfig & {
/** Maximum depth to traverse */
maxDepth?: number;
/** Keys to split on (arrays/objects at these keys become chunks) */
splitKeys?: string[];
/** Keys to preserve as single units */
preserveKeys?: string[];
/** Include JSON path in metadata */
includeJsonPath?: boolean;
};
/**
* LaTeX chunker configuration
* LaTeX structure-aware splitting
*/
export type LaTeXChunkerConfig = BaseChunkerConfig & {
/** Environments to split on (default: ["section", "subsection", "chapter"]) */
splitEnvironments?: string[];
/** Preserve math environments as single chunks */
preserveMath?: boolean;
/** Include preamble as separate chunk */
includePreamble?: boolean;
};
/**
* Semantic chunker configuration
* LLM-based semantic splitting
*/
export type SemanticChunkerConfig = BaseChunkerConfig & {
/** Minimum tokens before considering a split */
joinThreshold?: number;
/** Model for semantic analysis */
modelName?: string;
/** Provider for the model */
provider?: string;
/** Custom prompt for semantic grouping */
semanticPrompt?: string;
/** Maximum header depth to consider for grouping */
maxHeaderDepth?: number;
/** Similarity threshold for grouping (0-1) */
similarityThreshold?: number;
};
/**
* Union type for all chunker configurations
*/
export type ChunkerConfig = CharacterChunkerConfig | RecursiveChunkerConfig | SentenceChunkerConfig | TokenChunkerConfig | MarkdownChunkerConfig | HTMLChunkerConfig | JSONChunkerConfig | LaTeXChunkerConfig | SemanticChunkerConfig;
/**
* Chunker type - all chunking strategies implement this
*/
/**
* Chunker metadata for factory registration
*/
export type ChunkerMetadata = {
/** Human-readable description */
description: string;
/** Supported document types */
supportedTypes?: DocumentType[];
/** Whether the chunker requires external dependencies */
requiresExternalDeps?: boolean;
/** Default configuration (can be any chunker-specific config) */
defaultConfig?: Record<string, unknown>;
/** Supported configuration options */
supportedOptions?: string[];
/** Use cases where this chunker excels */
useCases?: string[];
/** Alternative names/aliases for this chunker */
aliases?: string[];
};
/**
* Metadata extraction types
*/
export type ExtractorType = "title" | "summary" | "keywords" | "questions" | "custom";
/**
* Base configuration for metadata extractors
*/
export type BaseExtractorConfig = {
/** Language model to use for extraction */
modelName?: string;
/** Provider for the model */
provider?: string;
/** Custom prompt template */
promptTemplate?: string;
/** Maximum tokens for LLM response */
maxTokens?: number;
/** Temperature for LLM generation */
temperature?: number;
};
/**
* Title extractor configuration
*/
export type TitleExtractorConfig = BaseExtractorConfig & {
/** Number of nodes to use for title extraction */
nodes?: number;
/** Template for processing individual nodes */
nodeTemplate?: string;
/** Template for combining node results */
combineTemplate?: string;
};
/**
* Summary extractor configuration
*/
export type SummaryExtractorConfig = BaseExtractorConfig & {
/** Summary types to generate */
summaryTypes?: ("current" | "previous" | "next")[];
/** Maximum summary length in words */
maxWords?: number;
};
/**
* Keyword extractor configuration
*/
export type KeywordExtractorConfig = BaseExtractorConfig & {
/** Maximum number of keywords to extract */
maxKeywords?: number;
/** Minimum keyword relevance score (0-1) */
minRelevance?: number;
};
/**
* Question-Answer extractor configuration
*/
export type QuestionExtractorConfig = BaseExtractorConfig & {
/** Number of Q&A pairs to generate */
numQuestions?: number;
/** Include answers in output */
includeAnswers?: boolean;
/** Generate embedding-only questions (shorter, more focused) */
embeddingOnly?: boolean;
};
/**
* Custom schema extractor configuration
*/
export type CustomSchemaExtractorConfig = BaseExtractorConfig & {
/** Zod schema for structured extraction */
schema: unknown;
/** Description of what to extract */
description?: string;
};
/**
* Combined extraction parameters
*/
export type ExtractParams = {
/** Extract document title */
title?: boolean | TitleExtractorConfig;
/** Extract document summary */
summary?: boolean | SummaryExtractorConfig;
/** Extract keywords */
keywords?: boolean | KeywordExtractorConfig;
/** Generate Q&A pairs */
questions?: boolean | QuestionExtractorConfig;
/** Custom schema extraction */
custom?: CustomSchemaExtractorConfig;
};
/**
* Extraction result for a single chunk
*/
export type ExtractionResult = {
/** Extracted title */
title?: string;
/** Extracted summary */
summary?: string;
/** Extracted keywords */
keywords?: string[];
/** Generated Q&A pairs */
questions?: Array<{
question: string;
answer?: string;
}>;
/** Custom schema extraction result */
custom?: Record<string, unknown>;
};
/**
* Request context for dynamic configuration
*/
export type RequestContext = {
userId?: string;
tenantId?: string;
environment?: string;
custom?: Record<string, unknown>;
};
/**
* Metadata filter using MongoDB/Sift query syntax
*/
export type MetadataFilter = {
$eq?: unknown;
$ne?: unknown;
$gt?: number;
$gte?: number;
$lt?: number;
$lte?: number;
$in?: unknown[];
$nin?: unknown[];
$and?: MetadataFilter[];
$or?: MetadataFilter[];
$not?: MetadataFilter;
$nor?: MetadataFilter[];
$exists?: boolean;
$contains?: string;
$regex?: string;
$size?: number;
[field: string]: unknown;
};
/**
* Vector store query result
*/
export type VectorQueryResult = {
/** Unique identifier */
id: string;
/** Text content */
text?: string;
/** Similarity/relevance score */
score?: number;
/** Associated metadata */
metadata?: Record<string, unknown>;
/** Embedding vector (if requested) */
vector?: number[];
};
/**
* Reranker configuration
*/
/**
* Provider-specific query options
*/
export type VectorProviderOptions = {
/** Pinecone options */
pinecone?: {
namespace?: string;
sparseVector?: number[];
};
/** pgVector options */
pgVector?: {
minScore?: number;
ef?: number;
probes?: number;
};
/** Chroma options */
chroma?: {
where?: Record<string, unknown>;
whereDocument?: Record<string, unknown>;
};
};
/**
* Vector query tool configuration
*/
export type VectorQueryToolConfig = {
/** Tool identifier */
id?: string;
/** Tool description for AI agents */
description?: string;
/** Index name within the vector store */
indexName: string;
/** Embedding model specification */
embeddingModel: {
provider: string;
modelName: string;
};
/** Enable metadata filtering */
enableFilter?: boolean;
/** Include embedding vectors in results */
includeVectors?: boolean;
/** Include full source objects in results */
includeSources?: boolean;
/** Number of results to return */
topK?: number;
/** Reranker configuration */
reranker?: RerankerConfig;
/** Provider-specific options */
providerOptions?: VectorProviderOptions;
};
/**
* Vector query result wrapper
*/
export type VectorQueryResponse = {
/** Formatted relevant context string */
relevantContext: string;
/** Source query results */
sources: VectorQueryResult[];
/** Total results found */
totalResults: number;
/** Query metadata */
metadata: {
queryTime: number;
reranked: boolean;
filtered: boolean;
};
};
/**
* BM25 search result
*/
export type BM25Result = {
/** Document ID */
id: string;
/** BM25 score */
score: number;
/** Document text */
text: string;
/** Associated metadata */
metadata?: Record<string, unknown>;
};
/**
* Hybrid search configuration
*/
export type HybridSearchConfig = {
/** Weight for vector search (0-1) */
vectorWeight?: number;
/** Weight for BM25 search (0-1) */
bm25Weight?: number;
/** Fusion method */
fusionMethod?: "rrf" | "linear";
/** RRF k parameter */
rrfK?: number;
/** Number of results to return */
topK?: number;
/** Enable reranking */
enableReranking?: boolean;
/** Reranker configuration */
reranker?: RerankerConfig;
};
/**
* Hybrid search result
*/
export type HybridSearchResult = {
/** Document ID */
id: string;
/** Combined score */
score: number;
/** Document text */
text: string;
/** Associated metadata */
metadata?: Record<string, unknown>;
/** Score breakdown */
scores?: {
vector?: number;
bm25?: number;
combined?: number;
reranked?: number;
};
};
/**
* Graph node representing a document chunk
*/
export type GraphNode = {
/** Unique node identifier */
id: string;
/** Text content of the node */
content: string;
/** Node metadata */
metadata: Record<string, unknown>;
/** Embedding vector */
embedding?: number[];
};
/**
* Graph edge representing semantic relationship
*/
export type GraphEdge = {
/** Source node ID */
source: string;
/** Target node ID */
target: string;
/** Edge weight (similarity score) */
weight: number;
/** Edge type */
type?: string;
};
/**
* Chunk input for graph creation
*/
export type GraphChunk = {
/** Chunk text content */
text: string;
/** Chunk metadata */
metadata?: Record<string, unknown>;
};
/**
* Embedding input for graph creation
*/
export type GraphEmbedding = {
/** Embedding vector */
vector: number[];
};
/**
* Ranked node result from graph query
*/
export type RankedNode = {
/** Node ID */
id: string;
/** Node content */
content: string;
/** Node metadata */
metadata: Record<string, unknown>;
/** Relevance score */
score: number;
};
/**
* Graph RAG configuration
*/
export type GraphRAGConfig = {
/** Embedding vector dimension (default: 1536) */
dimension?: number;
/** Similarity threshold for edge creation (default: 0.7) */
threshold?: number;
};
/**
* Graph query parameters
*/
export type GraphQueryParams = {
/** Query embedding vector */
query: number[];
/** Number of results to return (default: 10) */
topK?: number;
/** Random walk steps (default: 100) */
randomWalkSteps?: number;
/** Restart probability for random walk (default: 0.15) */
restartProb?: number;
};
/**
* Graph statistics
*/
export type GraphStats = {
nodeCount: number;
edgeCount: number;
avgDegree: number;
threshold: number;
};
/**
* Reranker type options
*/
/**
* Reranker options
*/
export type RerankerOptions = {
/** Pre-computed query embedding */
queryEmbedding?: number[];
/** Number of results to return after reranking */
topK?: number;
/** Scoring weights (must sum to 1.0) */
weights?: {
semantic?: number;
vector?: number;
position?: number;
};
};
/**
* Reranked result with detailed scoring
*/
export type RerankResult = {
/** Original query result */
result: VectorQueryResult;
/** Combined reranking score (0-1) */
score: number;
/** Detailed score breakdown */
details: {
semantic: number;
vector: number;
position: number;
queryAnalysis?: string;
};
};
/**
* MDocument configuration
*/
export type MDocumentConfig = {
/** Document type */
type: DocumentType;
/** Custom metadata */
metadata?: Record<string, unknown>;
};
/**
* Chunk parameters for MDocument
*/
export type ChunkParams = {
/** Chunking strategy to use */
strategy?: ChunkingStrategy;
/** Strategy-specific configuration */
config?: ChunkerConfig;
/** Metadata extraction options */
extract?: ExtractParams;
};
/**
* RAG CLI command arguments
*/
export type RAGCommandArgs = {
/** Input file path */
file?: string;
/** Query string */
query?: string;
/** Chunking strategy */
strategy?: ChunkingStrategy;
/** Maximum chunk size */
maxSize?: number;
/** Chunk overlap */
overlap?: number;
/** Output format */
format?: "json" | "text" | "table";
/** Enable verbose output */
verbose?: boolean;
/** Provider for embeddings */
provider?: string;
/** Model for embeddings */
model?: string;
/** Number of results */
topK?: number;
/** Index name */
index?: string;
/** Enable hybrid search */
hybrid?: boolean;
/** Use Graph RAG */
graph?: boolean;
};
/** Options for the recursive JSON chunk extractor. */
export type ExtractChunksOptions = {
data: unknown;
path: string;
depth: number;
maxDepth: number;
maxSize: number;
splitKeys: string[];
preserveKeys: string[];
includeJsonPath: boolean;
};
/** Document processing state held by MDocument. */
export type DocumentState = {
content: string;
type: DocumentType;
metadata: Record<string, unknown>;
chunks: Chunk[];
embeddings: number[][];
history: string[];
};
/** Canonical RAG error code. */
export type RAGErrorCode = "RAG_CHUNKING_ERROR" | "RAG_CHUNKING_INVALID_CONFIG" | "RAG_CHUNKING_STRATEGY_NOT_FOUND" | "RAG_CHUNKING_EMPTY_CONTENT" | "RAG_CHUNKING_SIZE_EXCEEDED" | "RAG_METADATA_EXTRACTION_ERROR" | "RAG_METADATA_EXTRACTION_TIMEOUT" | "RAG_METADATA_SCHEMA_INVALID" | "RAG_METADATA_EXTRACTOR_NOT_FOUND" | "RAG_EMBEDDING_ERROR" | "RAG_EMBEDDING_DIMENSION_MISMATCH" | "RAG_EMBEDDING_RATE_LIMIT" | "RAG_EMBEDDING_PROVIDER_ERROR" | "RAG_VECTOR_QUERY_ERROR" | "RAG_VECTOR_QUERY_TIMEOUT" | "RAG_VECTOR_STORE_UNAVAILABLE" | "RAG_VECTOR_STORE_CONNECTION_ERROR" | "RAG_VECTOR_INDEX_NOT_FOUND" | "RAG_RERANKER_ERROR" | "RAG_RERANKER_NOT_FOUND" | "RAG_RERANKER_API_ERROR" | "RAG_GRAPH_ERROR" | "RAG_GRAPH_TRAVERSAL_ERROR" | "RAG_GRAPH_NODE_NOT_FOUND" | "RAG_PIPELINE_ERROR" | "RAG_PIPELINE_STAGE_FAILED" | "RAG_PIPELINE_PARTIAL_FAILURE" | "RAG_CIRCUIT_BREAKER_OPEN" | "RAG_CIRCUIT_BREAKER_HALF_OPEN_LIMIT" | "RAG_OPERATION_TIMEOUT" | "RAG_RETRY_EXHAUSTED" | "RAG_INVALID_CONFIGURATION";