UNPKG

@langchain/community

Version:
228 lines (227 loc) 9.91 kB
import { type Pool, type PoolClient, type PoolConfig } from "pg"; import { VectorStore } from "@langchain/core/vectorstores"; import type { EmbeddingsInterface } from "@langchain/core/embeddings"; import { Document } from "@langchain/core/documents"; type Metadata = Record<string, unknown>; export type DistanceStrategy = "cosine" | "innerProduct" | "euclidean"; /** * Interface that defines the arguments required to create a * `PGVectorStore` instance. It includes Postgres connection options, * table name, filter, and verbosity level. */ export interface PGVectorStoreArgs { postgresConnectionOptions?: PoolConfig; pool?: Pool; tableName: string; collectionTableName?: string; collectionName?: string; collectionMetadata?: Metadata | null; schemaName?: string | null; extensionSchemaName?: string | null; columns?: { idColumnName?: string; vectorColumnName?: string; contentColumnName?: string; metadataColumnName?: string; }; filter?: Metadata; verbose?: boolean; /** * The amount of documents to chunk by when * adding vectors. * @default 500 */ chunkSize?: number; ids?: string[]; distanceStrategy?: DistanceStrategy; } /** * Class that provides an interface to a Postgres vector database. It * extends the `VectorStore` base class and implements methods for adding * documents and vectors, performing similarity searches, and ensuring the * existence of a table in the database. */ export declare class PGVectorStore extends VectorStore { FilterType: Metadata; tableName: string; collectionTableName?: string; collectionName: string; collectionMetadata: Metadata | null; schemaName: string | null; idColumnName: string; vectorColumnName: string; contentColumnName: string; extensionSchemaName: string | null; metadataColumnName: string; filter?: Metadata; _verbose?: boolean; pool: Pool; client?: PoolClient; chunkSize: number; distanceStrategy?: DistanceStrategy; _vectorstoreType(): string; constructor(embeddings: EmbeddingsInterface, config: PGVectorStoreArgs); get computedTableName(): string; get computedCollectionTableName(): string; get computedOperatorString(): string; /** * Static method to create a new `PGVectorStore` instance from a * connection. It creates a table if one does not exist, and calls * `connect` to return a new instance of `PGVectorStore`. * * @param embeddings - Embeddings instance. * @param fields - `PGVectorStoreArgs` instance. * @returns A new instance of `PGVectorStore`. */ static initialize(embeddings: EmbeddingsInterface, config: PGVectorStoreArgs): Promise<PGVectorStore>; protected _initializeClient(): Promise<void>; /** * Method to add documents to the vector store. It converts the documents into * vectors, and adds them to the store. * * @param documents - Array of `Document` instances. * @param options - Optional arguments for adding documents * @returns Promise that resolves when the documents have been added. */ addDocuments(documents: Document[], options?: { ids?: string[]; }): Promise<void>; /** * Inserts a row for the collectionName provided at initialization if it does not * exist and returns the collectionId. * * @returns The collectionId for the given collectionName. */ getOrCreateCollection(): Promise<string>; /** * Generates the SQL placeholders for a specific row at the provided index. * * @param index - The index of the row for which placeholders need to be generated. * @param numOfColumns - The number of columns we are inserting data into. * @returns The SQL placeholders for the row values. */ private generatePlaceholderForRowAt; /** * Constructs the SQL query for inserting rows into the specified table. * * @param rows - The rows of data to be inserted, consisting of values and records. * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. * @returns The complete SQL INSERT INTO query string. */ private buildInsertQuery; /** * Method to add vectors to the vector store. It converts the vectors into * rows and inserts them into the database. * * @param vectors - Array of vectors. * @param documents - Array of `Document` instances. * @param options - Optional arguments for adding documents * @returns Promise that resolves when the vectors have been added. */ addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[]; }): Promise<void>; /** * Method to delete documents from the vector store. It deletes the * documents that match the provided ids. * * @param ids - Array of document ids. * @returns Promise that resolves when the documents have been deleted. */ private deleteById; /** * Method to delete documents from the vector store. It deletes the * documents whose metadata contains the filter. * * @param filter - An object representing the Metadata filter. * @returns Promise that resolves when the documents have been deleted. */ private deleteByFilter; /** * Method to delete documents from the vector store. It deletes the * documents that match the provided ids or metadata filter. Matches ids * exactly and metadata filter according to postgres jsonb containment. Ids and filter * are mutually exclusive. * * @param params - Object containing either an array of ids or a metadata filter object. * @returns Promise that resolves when the documents have been deleted. * @throws Error if neither ids nor filter are provided, or if both are provided. * @example <caption>Delete by ids</caption> * await vectorStore.delete({ ids: ["id1", "id2"] }); * @example <caption>Delete by filter</caption> * await vectorStore.delete({ filter: { a: 1, b: 2 } }); */ delete(params: { ids?: string[]; filter?: Metadata; }): Promise<void>; /** * Method to perform a similarity search in the vector store. It returns * the `k` most similar documents to the query vector, along with their * similarity scores. * * @param query - Query vector. * @param k - Number of most similar documents to return. * @param filter - Optional filter to apply to the search. * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. */ similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document, number][]>; /** * Method to ensure the existence of the table in the database. It creates * the table if it does not already exist. * * @returns Promise that resolves when the table has been ensured. */ ensureTableInDatabase(): Promise<void>; /** * Method to ensure the existence of the collection table in the database. * It creates the table if it does not already exist. * * @returns Promise that resolves when the collection table has been ensured. */ ensureCollectionTableInDatabase(): Promise<void>; /** * Static method to create a new `PGVectorStore` instance from an * array of texts and their metadata. It converts the texts into * `Document` instances and adds them to the store. * * @param texts - Array of texts. * @param metadatas - Array of metadata objects or a single metadata object. * @param embeddings - Embeddings instance. * @param dbConfig - `PGVectorStoreArgs` instance. * @returns Promise that resolves with a new instance of `PGVectorStore`. */ static fromTexts(texts: string[], metadatas: object[] | object, embeddings: EmbeddingsInterface, dbConfig: PGVectorStoreArgs): Promise<PGVectorStore>; /** * Static method to create a new `PGVectorStore` instance from an * array of `Document` instances. It adds the documents to the store. * * @param docs - Array of `Document` instances. * @param embeddings - Embeddings instance. * @param dbConfig - `PGVectorStoreArgs` instance. * @returns Promise that resolves with a new instance of `PGVectorStore`. */ static fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: PGVectorStoreArgs): Promise<PGVectorStore>; /** * Closes all the clients in the pool and terminates the pool. * * @returns Promise that resolves when all clients are closed and the pool is terminated. */ end(): Promise<void>; /** * Method to create the HNSW index on the vector column. * * @param dimensions - Defines the number of dimensions in your vector data type, up to 2000. For example, use 1536 for OpenAI's text-embedding-ada-002 and Amazon's amazon.titan-embed-text-v1 models. * @param m - The max number of connections per layer (16 by default). Index build time improves with smaller values, while higher values can speed up search queries. * @param efConstruction - The size of the dynamic candidate list for constructing the graph (64 by default). A higher value can potentially improve the index quality at the cost of index build time. * @param distanceFunction - The distance function name you want to use, is automatically selected based on the distanceStrategy. * @returns Promise that resolves with the query response of creating the index. */ createHnswIndex(config: { dimensions: number; m?: number; efConstruction?: number; distanceFunction?: string; }): Promise<void>; } export {};