UNPKG

@langchain/community

Version:
323 lines (322 loc) 13.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.VercelPostgres = void 0; const postgres_1 = require("@vercel/postgres"); const vectorstores_1 = require("@langchain/core/vectorstores"); const documents_1 = require("@langchain/core/documents"); const env_1 = require("@langchain/core/utils/env"); /** * Class that provides an interface to a Vercel Postgres vector database. It * extends the `VectorStore` base class and implements methods for adding * documents and vectors and performing similarity searches. */ class VercelPostgres extends vectorstores_1.VectorStore { _vectorstoreType() { return "vercel"; } constructor(embeddings, config) { super(embeddings, config); Object.defineProperty(this, "tableName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "idColumnName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "vectorColumnName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "contentColumnName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "metadataColumnName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "filter", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_verbose", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "pool", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "client", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.tableName = config.tableName ?? "langchain_vectors"; this.filter = config.filter; this.vectorColumnName = config.columns?.vectorColumnName ?? "embedding"; this.contentColumnName = config.columns?.contentColumnName ?? "text"; this.idColumnName = config.columns?.idColumnName ?? "id"; this.metadataColumnName = config.columns?.metadataColumnName ?? "metadata"; this.pool = config.pool; this.client = config.client; this._verbose = (0, env_1.getEnvironmentVariable)("LANGCHAIN_VERBOSE") === "true" ?? !!config.verbose; } /** * Static method to create a new `VercelPostgres` instance from a * connection. It creates a table if one does not exist, and calls * `connect` to return a new instance of `VercelPostgres`. * * @param embeddings - Embeddings instance. * @param fields - `VercelPostgres` configuration options. * @returns A new instance of `VercelPostgres`. */ static async initialize(embeddings, config) { // Default maxUses to 1 for edge environments: // https://github.com/vercel/storage/tree/main/packages/postgres#a-note-on-edge-environments const pool = config?.pool ?? (0, postgres_1.createPool)({ maxUses: 1, ...config?.postgresConnectionOptions }); const client = config?.client ?? (await pool.connect()); const postgresqlVectorStore = new VercelPostgres(embeddings, { ...config, pool, client, }); await postgresqlVectorStore.ensureTableInDatabase(); return postgresqlVectorStore; } /** * Method to add documents to the vector store. It converts the documents into * vectors, and adds them to the store. * * @param documents - Array of `Document` instances. * @returns Promise that resolves when the documents have been added. */ async addDocuments(documents, options) { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options); } /** * Generates the SQL placeholders for a specific row at the provided index. * * @param index - The index of the row for which placeholders need to be generated. * @returns The SQL placeholders for the row values. */ generatePlaceholderForRowAt( // eslint-disable-next-line @typescript-eslint/no-explicit-any row, index) { const base = index * row.length; return `(${row.map((_, j) => `$${base + 1 + j}`)})`; } /** * Constructs the SQL query for inserting rows into the specified table. * * @param rows - The rows of data to be inserted, consisting of values and records. * @param chunkIndex - The starting index for generating query placeholders based on chunk positioning. * @returns The complete SQL INSERT INTO query string. */ async runInsertQuery( // eslint-disable-next-line @typescript-eslint/no-explicit-any rows, useIdColumn) { const values = rows.map((row, j) => this.generatePlaceholderForRowAt(row, j)); const flatValues = rows.flat(); return this.client.query(` INSERT INTO ${this.tableName} ( ${useIdColumn ? `${this.idColumnName},` : ""} ${this.contentColumnName}, ${this.vectorColumnName}, ${this.metadataColumnName} ) VALUES ${values.join(", ")} ON CONFLICT (${this.idColumnName}) DO UPDATE SET ${this.contentColumnName} = EXCLUDED.${this.contentColumnName}, ${this.vectorColumnName} = EXCLUDED.${this.vectorColumnName}, ${this.metadataColumnName} = EXCLUDED.${this.metadataColumnName} RETURNING ${this.idColumnName}`, flatValues); } /** * Method to add vectors to the vector store. It converts the vectors into * rows and inserts them into the database. * * @param vectors - Array of vectors. * @param documents - Array of `Document` instances. * @returns Promise that resolves when the vectors have been added. */ async addVectors(vectors, documents, options) { if (options?.ids !== undefined && options?.ids.length !== vectors.length) { throw new Error(`If provided, the length of "ids" must be the same as the number of vectors.`); } const rows = vectors.map((embedding, idx) => { const embeddingString = `[${embedding.join(",")}]`; const row = [ documents[idx].pageContent, embeddingString, documents[idx].metadata, ]; if (options?.ids) { return [options.ids[idx], ...row]; } return row; }); const chunkSize = 500; const ids = []; for (let i = 0; i < rows.length; i += chunkSize) { const chunk = rows.slice(i, i + chunkSize); try { const result = await this.runInsertQuery(chunk, options?.ids !== undefined); ids.push(...result.rows.map((row) => row[this.idColumnName])); } catch (e) { console.error(e); throw new Error(`Error inserting: ${e.message}`); } } return ids; } /** * Method to perform a similarity search in the vector store. It returns * the `k` most similar documents to the query vector, along with their * similarity scores. * * @param query - Query vector. * @param k - Number of most similar documents to return. * @param filter - Optional filter to apply to the search. * @returns Promise that resolves with an array of tuples, each containing a `Document` and its similarity score. */ async similaritySearchVectorWithScore(query, k, filter) { const embeddingString = `[${query.join(",")}]`; const _filter = filter ?? {}; const whereClauses = []; const values = [embeddingString, k]; let paramCount = values.length; for (const [key, value] of Object.entries(_filter)) { if (typeof value === "object" && value !== null) { const currentParamCount = paramCount; const placeholders = value.in .map((_, index) => `$${currentParamCount + index + 1}`) .join(","); whereClauses.push(`${this.metadataColumnName}->>'${key}' IN (${placeholders})`); values.push(...value.in); paramCount += value.in.length; } else { paramCount += 1; whereClauses.push(`${this.metadataColumnName}->>'${key}' = $${paramCount}`); values.push(value); } } const whereClause = whereClauses.length ? `WHERE ${whereClauses.join(" AND ")}` : ""; const queryString = ` SELECT *, ${this.vectorColumnName} <=> $1 as "_distance" FROM ${this.tableName} ${whereClause} ORDER BY "_distance" ASC LIMIT $2;`; const documents = (await this.client.query(queryString, values)).rows; const results = []; for (const doc of documents) { if (doc._distance != null && doc[this.contentColumnName] != null) { const document = new documents_1.Document({ pageContent: doc[this.contentColumnName], metadata: doc[this.metadataColumnName], }); results.push([document, doc._distance]); } } return results; } async delete(params) { if (params.ids !== undefined) { await this.client.query(`DELETE FROM ${this.tableName} WHERE ${this.idColumnName} IN (${params.ids.map((_, idx) => `$${idx + 1}`)})`, params.ids); } else if (params.deleteAll) { await this.client.query(`TRUNCATE TABLE ${this.tableName}`); } } /** * Method to ensure the existence of the table in the database. It creates * the table if it does not already exist. * * @returns Promise that resolves when the table has been ensured. */ async ensureTableInDatabase() { await this.client.query(`CREATE EXTENSION IF NOT EXISTS vector;`); await this.client.query(`CREATE EXTENSION IF NOT EXISTS "uuid-ossp";`); await this.client.query(`CREATE TABLE IF NOT EXISTS "${this.tableName}" ( "${this.idColumnName}" uuid NOT NULL DEFAULT uuid_generate_v4() PRIMARY KEY, "${this.contentColumnName}" text, "${this.metadataColumnName}" jsonb, "${this.vectorColumnName}" vector );`); } /** * Static method to create a new `VercelPostgres` instance from an * array of texts and their metadata. It converts the texts into * `Document` instances and adds them to the store. * * @param texts - Array of texts. * @param metadatas - Array of metadata objects or a single metadata object. * @param embeddings - Embeddings instance. * @param fields - `VercelPostgres` configuration options. * @returns Promise that resolves with a new instance of `VercelPostgres`. */ static async fromTexts(texts, metadatas, embeddings, dbConfig) { const docs = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = new documents_1.Document({ pageContent: texts[i], metadata, }); docs.push(newDoc); } return this.fromDocuments(docs, embeddings, dbConfig); } /** * Static method to create a new `VercelPostgres` instance from an * array of `Document` instances. It adds the documents to the store. * * @param docs - Array of `Document` instances. * @param embeddings - Embeddings instance. * @param fields - `VercelPostgres` configuration options. * @returns Promise that resolves with a new instance of `VercelPostgres`. */ static async fromDocuments(docs, embeddings, dbConfig) { const instance = await this.initialize(embeddings, dbConfig); await instance.addDocuments(docs); return instance; } /** * Closes all the clients in the pool and terminates the pool. * * @returns Promise that resolves when all clients are closed and the pool is terminated. */ async end() { await this.client?.release(); return this.pool.end(); } } exports.VercelPostgres = VercelPostgres;