UNPKG

@langchain/core

Version:
429 lines (428 loc) 19.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SaveableVectorStore = exports.VectorStore = exports.VectorStoreRetriever = void 0; const index_js_1 = require("./retrievers/index.cjs"); const serializable_js_1 = require("./load/serializable.cjs"); /** * Class for retrieving documents from a `VectorStore` based on vector similarity * or maximal marginal relevance (MMR). * * `VectorStoreRetriever` extends `BaseRetriever`, implementing methods for * adding documents to the underlying vector store and performing document * retrieval with optional configurations. * * @class VectorStoreRetriever * @extends BaseRetriever * @implements VectorStoreRetrieverInterface * @template V - Type of vector store implementing `VectorStoreInterface`. */ class VectorStoreRetriever extends index_js_1.BaseRetriever { static lc_name() { return "VectorStoreRetriever"; } get lc_namespace() { return ["langchain_core", "vectorstores"]; } /** * Returns the type of vector store, as defined by the `vectorStore` instance. * * @returns {string} The vector store type. */ _vectorstoreType() { return this.vectorStore._vectorstoreType(); } /** * Initializes a new instance of `VectorStoreRetriever` with the specified configuration. * * This constructor configures the retriever to interact with a given `VectorStore` * and supports different retrieval strategies, including similarity search and maximal * marginal relevance (MMR) search. Various options allow customization of the number * of documents retrieved per query, filtering based on conditions, and fine-tuning * MMR-specific parameters. * * @param fields - Configuration options for setting up the retriever: * * - `vectorStore` (required): The `VectorStore` instance implementing `VectorStoreInterface` * that will be used to store and retrieve document embeddings. This is the core component * of the retriever, enabling vector-based similarity and MMR searches. * * - `k` (optional): Specifies the number of documents to retrieve per search query. If not * provided, defaults to 4. This count determines the number of most relevant documents returned * for each search operation, balancing performance with comprehensiveness. * * - `searchType` (optional): Defines the search approach used by the retriever, allowing for * flexibility between two methods: * - `"similarity"` (default): A similarity-based search, retrieving documents with high vector * similarity to the query. This type prioritizes relevance and is often used when diversity * among results is less critical. * - `"mmr"`: Maximal Marginal Relevance search, which combines relevance with diversity. MMR * is useful for scenarios where varied content is essential, as it selects results that * both match the query and introduce content diversity. * * - `filter` (optional): A filter of type `FilterType`, defined by the vector store, that allows * for refined and targeted search results. This filter applies specified conditions to limit * which documents are eligible for retrieval, offering control over the scope of results. * * - `searchKwargs` (optional, applicable only if `searchType` is `"mmr"`): Additional settings * for configuring MMR-specific behavior. These parameters allow further tuning of the MMR * search process: * - `fetchK`: The initial number of documents fetched from the vector store before the MMR * algorithm is applied. Fetching a larger set enables the algorithm to select a more * diverse subset of documents. * - `lambda`: A parameter controlling the relevance-diversity balance, where 0 emphasizes * diversity and 1 prioritizes relevance. Intermediate values provide a blend of the two, * allowing customization based on the importance of content variety relative to query relevance. */ constructor(fields) { super(fields); /** * The instance of `VectorStore` used for storing and retrieving document embeddings. * This vector store must implement the `VectorStoreInterface` to be compatible * with the retriever’s operations. */ Object.defineProperty(this, "vectorStore", { enumerable: true, configurable: true, writable: true, value: void 0 }); /** * Specifies the number of documents to retrieve for each search query. * Defaults to 4 if not specified, providing a basic result count for similarity or MMR searches. */ Object.defineProperty(this, "k", { enumerable: true, configurable: true, writable: true, value: 4 }); /** * Determines the type of search operation to perform on the vector store. * * - `"similarity"` (default): Conducts a similarity search based purely on vector similarity * to the query. * - `"mmr"`: Executes a maximal marginal relevance (MMR) search, balancing relevance and * diversity in the retrieved results. */ Object.defineProperty(this, "searchType", { enumerable: true, configurable: true, writable: true, value: "similarity" }); /** * Additional options specific to maximal marginal relevance (MMR) search, applicable * only if `searchType` is set to `"mmr"`. * * Includes: * - `fetchK`: The initial number of documents fetched before applying the MMR algorithm, * allowing for a larger selection from which to choose the most diverse results. * - `lambda`: A parameter between 0 and 1 to adjust the relevance-diversity balance, * where 0 prioritizes diversity and 1 prioritizes relevance. */ Object.defineProperty(this, "searchKwargs", { enumerable: true, configurable: true, writable: true, value: void 0 }); /** * Optional filter applied to search results, defined by the `FilterType` of the vector store. * Allows for refined, targeted results by restricting the returned documents based * on specified filter criteria. */ Object.defineProperty(this, "filter", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.vectorStore = fields.vectorStore; this.k = fields.k ?? this.k; this.searchType = fields.searchType ?? this.searchType; this.filter = fields.filter; if (fields.searchType === "mmr") { this.searchKwargs = fields.searchKwargs; } } /** * Retrieves relevant documents based on the specified query, using either * similarity or maximal marginal relevance (MMR) search. * * If `searchType` is set to `"mmr"`, performs an MMR search to balance * similarity and diversity among results. If `searchType` is `"similarity"`, * retrieves results purely based on similarity to the query. * * @param query - The query string used to find relevant documents. * @param runManager - Optional callback manager for tracking retrieval progress. * @returns A promise that resolves to an array of `DocumentInterface` instances * representing the most relevant documents to the query. * @throws {Error} Throws an error if MMR search is requested but not supported * by the vector store. * @protected */ async _getRelevantDocuments(query, runManager) { if (this.searchType === "mmr") { if (typeof this.vectorStore.maxMarginalRelevanceSearch !== "function") { throw new Error(`The vector store backing this retriever, ${this._vectorstoreType()} does not support max marginal relevance search.`); } return this.vectorStore.maxMarginalRelevanceSearch(query, { k: this.k, filter: this.filter, ...this.searchKwargs, }, runManager?.getChild("vectorstore")); } return this.vectorStore.similaritySearch(query, this.k, this.filter, runManager?.getChild("vectorstore")); } /** * Adds an array of documents to the vector store, embedding them as part of * the storage process. * * This method delegates document embedding and storage to the `addDocuments` * method of the underlying vector store. * * @param documents - An array of documents to embed and add to the vector store. * @param options - Optional settings to customize document addition. * @returns A promise that resolves to an array of document IDs or `void`, * depending on the vector store's implementation. */ async addDocuments(documents, options) { return this.vectorStore.addDocuments(documents, options); } } exports.VectorStoreRetriever = VectorStoreRetriever; /** * Abstract class representing a vector storage system for performing * similarity searches on embedded documents. * * `VectorStore` provides methods for adding precomputed vectors or documents, * removing documents based on criteria, and performing similarity searches * with optional scoring. Subclasses are responsible for implementing specific * storage mechanisms and the exact behavior of certain abstract methods. * * @abstract * @extends Serializable * @implements VectorStoreInterface */ class VectorStore extends serializable_js_1.Serializable { /** * Initializes a new vector store with embeddings and database configuration. * * @param embeddings - Instance of `EmbeddingsInterface` used to embed queries. * @param dbConfig - Configuration settings for the database or storage system. */ // eslint-disable-next-line @typescript-eslint/no-explicit-any constructor(embeddings, dbConfig) { super(dbConfig); /** * Namespace within LangChain to uniquely identify this vector store's * location, based on the vector store type. * * @internal */ // Only ever instantiated in main LangChain Object.defineProperty(this, "lc_namespace", { enumerable: true, configurable: true, writable: true, value: ["langchain", "vectorstores", this._vectorstoreType()] }); /** * Embeddings interface for generating vector embeddings from text queries, * enabling vector-based similarity searches. */ Object.defineProperty(this, "embeddings", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.embeddings = embeddings; } /** * Deletes documents from the vector store based on the specified parameters. * * @param _params - Flexible key-value pairs defining conditions for document deletion. * @returns A promise that resolves once the deletion is complete. */ // eslint-disable-next-line @typescript-eslint/no-explicit-any async delete(_params) { throw new Error("Not implemented."); } /** * Searches for documents similar to a text query by embedding the query and * performing a similarity search on the resulting vector. * * @param query - Text query for finding similar documents. * @param k - Number of similar results to return. Defaults to 4. * @param filter - Optional filter based on `FilterType`. * @param _callbacks - Optional callbacks for monitoring search progress * @returns A promise resolving to an array of `DocumentInterface` instances representing similar documents. */ async similaritySearch(query, k = 4, filter = undefined, _callbacks = undefined // implement passing to embedQuery later ) { const results = await this.similaritySearchVectorWithScore(await this.embeddings.embedQuery(query), k, filter); return results.map((result) => result[0]); } /** * Searches for documents similar to a text query by embedding the query, * and returns results with similarity scores. * * @param query - Text query for finding similar documents. * @param k - Number of similar results to return. Defaults to 4. * @param filter - Optional filter based on `FilterType`. * @param _callbacks - Optional callbacks for monitoring search progress * @returns A promise resolving to an array of tuples, each containing a * document and its similarity score. */ async similaritySearchWithScore(query, k = 4, filter = undefined, _callbacks = undefined // implement passing to embedQuery later ) { return this.similaritySearchVectorWithScore(await this.embeddings.embedQuery(query), k, filter); } /** * Creates a `VectorStore` instance from an array of text strings and optional * metadata, using the specified embeddings and database configuration. * * Subclasses must implement this method to define how text and metadata * are embedded and stored in the vector store. Throws an error if not overridden. * * @param _texts - Array of strings representing the text documents to be stored. * @param _metadatas - Metadata for the texts, either as an array (one for each text) * or a single object (applied to all texts). * @param _embeddings - Instance of `EmbeddingsInterface` to embed the texts. * @param _dbConfig - Database configuration settings. * @returns A promise that resolves to a new `VectorStore` instance. * @throws {Error} Throws an error if this method is not overridden by a subclass. */ static fromTexts(_texts, _metadatas, _embeddings, // eslint-disable-next-line @typescript-eslint/no-explicit-any _dbConfig) { throw new Error("the Langchain vectorstore implementation you are using forgot to override this, please report a bug"); } /** * Creates a `VectorStore` instance from an array of documents, using the specified * embeddings and database configuration. * * Subclasses must implement this method to define how documents are embedded * and stored. Throws an error if not overridden. * * @param _docs - Array of `DocumentInterface` instances representing the documents to be stored. * @param _embeddings - Instance of `EmbeddingsInterface` to embed the documents. * @param _dbConfig - Database configuration settings. * @returns A promise that resolves to a new `VectorStore` instance. * @throws {Error} Throws an error if this method is not overridden by a subclass. */ static fromDocuments(_docs, _embeddings, // eslint-disable-next-line @typescript-eslint/no-explicit-any _dbConfig) { throw new Error("the Langchain vectorstore implementation you are using forgot to override this, please report a bug"); } /** * Creates a `VectorStoreRetriever` instance with flexible configuration options. * * @param kOrFields * - If a number is provided, it sets the `k` parameter (number of items to retrieve). * - If an object is provided, it should contain various configuration options. * @param filter * - Optional filter criteria to limit the items retrieved based on the specified filter type. * @param callbacks * - Optional callbacks that may be triggered at specific stages of the retrieval process. * @param tags * - Tags to categorize or label the `VectorStoreRetriever`. Defaults to an empty array if not provided. * @param metadata * - Additional metadata as key-value pairs to add contextual information for the retrieval process. * @param verbose * - If `true`, enables detailed logging for the retrieval process. Defaults to `false`. * * @returns * - A configured `VectorStoreRetriever` instance based on the provided parameters. * * @example * Basic usage with a `k` value: * ```typescript * const retriever = myVectorStore.asRetriever(5); * ``` * * Usage with a configuration object: * ```typescript * const retriever = myVectorStore.asRetriever({ * k: 10, * filter: myFilter, * tags: ['example', 'test'], * verbose: true, * searchType: 'mmr', * searchKwargs: { alpha: 0.5 }, * }); * ``` */ asRetriever(kOrFields, filter, callbacks, tags, metadata, verbose) { if (typeof kOrFields === "number") { return new VectorStoreRetriever({ vectorStore: this, k: kOrFields, filter, tags: [...(tags ?? []), this._vectorstoreType()], metadata, verbose, callbacks, }); } else { const params = { vectorStore: this, k: kOrFields?.k, filter: kOrFields?.filter, tags: [...(kOrFields?.tags ?? []), this._vectorstoreType()], metadata: kOrFields?.metadata, verbose: kOrFields?.verbose, callbacks: kOrFields?.callbacks, searchType: kOrFields?.searchType, }; if (kOrFields?.searchType === "mmr") { return new VectorStoreRetriever({ ...params, searchKwargs: kOrFields.searchKwargs, }); } return new VectorStoreRetriever({ ...params }); } } } exports.VectorStore = VectorStore; /** * Abstract class extending `VectorStore` that defines a contract for saving * and loading vector store instances. * * The `SaveableVectorStore` class allows vector store implementations to * persist their data and retrieve it when needed.The format for saving and * loading data is left to the implementing subclass. * * Subclasses must implement the `save` method to handle their custom * serialization logic, while the `load` method enables reconstruction of a * vector store from saved data, requiring compatible embeddings through the * `EmbeddingsInterface`. * * @abstract * @extends VectorStore */ class SaveableVectorStore extends VectorStore { /** * Loads a vector store instance from the specified directory, using the * provided embeddings to ensure compatibility. * * This static method reconstructs a `SaveableVectorStore` from previously * saved data. Implementations should interpret the saved data format to * recreate the vector store instance. * * @param _directory - The directory path from which the vector store * data will be loaded. * @param _embeddings - An instance of `EmbeddingsInterface` to align * the embeddings with the loaded vector data. * @returns A promise that resolves to a `SaveableVectorStore` instance * constructed from the saved data. */ static load(_directory, _embeddings) { throw new Error("Not implemented"); } } exports.SaveableVectorStore = SaveableVectorStore;