UNPKG

@mastra/rag

Version:

The Retrieval-Augmented Generation (RAG) module contains document processing and embedding utilities.

267 lines 9.56 kB
import type { MastraUnion } from '@mastra/core/action'; import type { RequestContext } from '@mastra/core/request-context'; import type { MastraVector, MastraEmbeddingModel, MastraEmbeddingOptions } from '@mastra/core/vector'; import type { RerankConfig } from '../rerank/index.js'; /** * Context passed to dynamic vector store resolver functions. * Enables multi-tenant setups where the vector store is selected based on request context. */ export interface VectorStoreResolverContext { /** The request context containing tenant/schema information */ requestContext?: RequestContext; /** The Mastra instance for accessing registered resources */ mastra?: MastraUnion; } /** * A function that dynamically resolves a vector store based on the execution context. * Useful for multi-tenant applications where each tenant has a separate schema/database. * * @example * ```typescript * const vectorStoreResolver: VectorStoreResolver = async ({ requestContext }) => { * const schemaId = requestContext?.get('schemaId'); * return new PgVector({ * connectionString: process.env.DATABASE_URL, * schemaName: `tenant_${schemaId}`, * }); * }; * ``` */ export type VectorStoreResolver = (context: VectorStoreResolverContext) => MastraVector | Promise<MastraVector>; export interface PineconeConfig { namespace?: string; sparseVector?: { indices: number[]; values: number[]; }; } export interface PgVectorConfig { minScore?: number; ef?: number; probes?: number; } type LiteralValue = string | number | boolean; type ListLiteralValue = LiteralValue[]; type LiteralNumber = number; type LogicalOperator = '$and' | '$or'; type InclusionOperator = '$in' | '$nin'; type WhereOperator = '$gt' | '$gte' | '$lt' | '$lte' | '$ne' | '$eq'; type OperatorExpression = { [key in WhereOperator | InclusionOperator | LogicalOperator]?: LiteralValue | ListLiteralValue; }; type BaseWhere = { [key: string]: LiteralValue | OperatorExpression; }; type LogicalWhere = { [key in LogicalOperator]?: Where[]; }; type Where = BaseWhere | LogicalWhere; type WhereDocumentOperator = '$contains' | '$not_contains' | LogicalOperator; type WhereDocument = { [key in WhereDocumentOperator]?: LiteralValue | LiteralNumber | WhereDocument[]; }; export interface ChromaConfig { where?: Where; whereDocument?: WhereDocument; } export type DatabaseConfig = { pinecone?: PineconeConfig; pgvector?: PgVectorConfig; chroma?: ChromaConfig; [key: string]: any; }; /** * Configuration options for creating a vector query tool. * * This type uses a discriminated union pattern for vector store configuration, * allowing two mutually exclusive approaches: * * 1. **By name**: Use `vectorStoreName` to reference a vector store registered with Mastra * 2. **Direct instance**: Use `vectorStore` to provide a vector store instance or resolver function * * @example Using a named vector store (registered with Mastra) * ```typescript * const tool = createVectorQueryTool({ * vectorStoreName: 'myVectorStore', * indexName: 'documents', * model: openai.embedding('text-embedding-3-small'), * }); * ``` * * @example Using a direct vector store instance * ```typescript * const tool = createVectorQueryTool({ * vectorStore: new PgVector({ connectionString: '...' }), * indexName: 'documents', * model: openai.embedding('text-embedding-3-small'), * }); * ``` * * @example With filtering and reranking enabled * ```typescript * const tool = createVectorQueryTool({ * vectorStoreName: 'myVectorStore', * indexName: 'documents', * model: openai.embedding('text-embedding-3-small'), * enableFilter: true, * reranker: { * model: cohere.rerank('rerank-v3.5'), * options: { topK: 5 }, * }, * }); * ``` */ export type VectorQueryToolOptions = { /** Custom tool ID. Defaults to `VectorQuery {storeName} {indexName} Tool` */ id?: string; /** Custom tool description for the LLM */ description?: string; /** Name of the index to query within the vector store */ indexName: string; /** Embedding model used to convert query text into vectors */ model: MastraEmbeddingModel<string>; /** When true, enables metadata filtering in queries. Adds a `filter` input to the tool schema */ enableFilter?: boolean; /** When true, includes vector embeddings in the results. Defaults to false */ includeVectors?: boolean; /** When true, includes source documents in the response. Defaults to true */ includeSources?: boolean; /** Optional reranker configuration to improve result relevance */ reranker?: RerankConfig; /** Database-specific configuration options */ databaseConfig?: DatabaseConfig; } & ProviderOptions & ({ /** Name of a vector store registered with Mastra */ vectorStoreName: string; } | { vectorStoreName?: string; /** * The vector store instance or a resolver function for dynamic selection. * * For multi-tenant applications, pass a function that receives the request context * and returns the appropriate vector store for the current tenant/schema. * * @example Static vector store * ```typescript * vectorStore: new PgVector({ connectionString: '...' }) * ``` * * @example Dynamic resolver for multi-tenant * ```typescript * vectorStore: async ({ requestContext }) => { * const schemaId = requestContext?.get('schemaId'); * return getVectorStoreForSchema(schemaId); * } * ``` */ vectorStore: MastraVector | VectorStoreResolver; }); /** * Configuration options for creating a GraphRAG tool. * * GraphRAG combines vector similarity search with graph-based retrieval for improved * context relevance through random walk algorithms. * * This type uses a discriminated union pattern for vector store configuration, * allowing two mutually exclusive approaches: * * 1. **By name**: Use `vectorStoreName` to reference a vector store registered with Mastra * 2. **Direct instance**: Use `vectorStore` to provide a vector store instance or resolver function * * @example Using a named vector store * ```typescript * const tool = createGraphRAGTool({ * vectorStoreName: 'myVectorStore', * indexName: 'documents', * model: openai.embedding('text-embedding-3-small'), * }); * ``` * * @example With custom graph options * ```typescript * const tool = createGraphRAGTool({ * vectorStoreName: 'myVectorStore', * indexName: 'documents', * model: openai.embedding('text-embedding-3-small'), * graphOptions: { * randomWalkSteps: 200, * restartProb: 0.2, * }, * }); * ``` */ export type GraphRagToolOptions = { /** Custom tool ID. Defaults to `GraphRAG {storeName} {indexName} Tool` */ id?: string; /** Custom tool description for the LLM */ description?: string; /** Name of the index to query within the vector store */ indexName: string; /** Embedding model used to convert query text into vectors */ model: MastraEmbeddingModel<string>; /** When true, enables metadata filtering in queries. Adds a `filter` input to the tool schema */ enableFilter?: boolean; /** When true, includes source documents in the response. Defaults to true */ includeSources?: boolean; /** Configuration options for the graph-based retrieval algorithm */ graphOptions?: { /** Vector dimension size. Defaults to 1536 */ dimension?: number; /** Number of steps in the random walk. Defaults to 100 */ randomWalkSteps?: number; /** Probability of restarting the random walk. Defaults to 0.15 */ restartProb?: number; /** Similarity threshold for graph edges. Defaults to 0.7 */ threshold?: number; }; } & ProviderOptions & ({ vectorStoreName: string; } | { vectorStoreName?: string; /** * The vector store instance or a resolver function for dynamic selection. * * For multi-tenant applications, pass a function that receives the request context * and returns the appropriate vector store for the current tenant/schema. * * @example Static vector store * ```typescript * vectorStore: new PgVector({ connectionString: '...' }) * ``` * * @example Dynamic resolver for multi-tenant * ```typescript * vectorStore: async ({ requestContext }) => { * const schemaId = requestContext?.get('schemaId'); * return getVectorStoreForSchema(schemaId); * } * ``` */ vectorStore: MastraVector | VectorStoreResolver; }); export type ProviderOptions = { /** * Provider-specific options for the embedding model (e.g., outputDimensionality). * * ⚠️ **IMPORTANT**: `providerOptions` only work with AI SDK v2 models. * * **For v1 models**: Configure options when creating the model: * ✅ const model = openai.embedding('text-embedding-3-small', { dimensions: 512 }); * * **For v2 models**: Use providerOptions: * ✅ providerOptions: { openai: { dimensions: 512 } } */ providerOptions?: MastraEmbeddingOptions['providerOptions']; }; /** * Default options for GraphRAG * @default { dimension: 1536, randomWalkSteps: 100, restartProb: 0.15, threshold: 0.7 } */ export declare const defaultGraphOptions: { dimension: number; randomWalkSteps: number; restartProb: number; threshold: number; }; export {}; //# sourceMappingURL=types.d.ts.map