UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

144 lines 6.16 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.makeMongoDbEmbeddedContentStore = void 0; const _1 = require("."); const MongoDbDatabaseConnection_1 = require("../MongoDbDatabaseConnection"); const assert_1 = require("assert"); const mongodb_1 = require("mongodb"); function makeMongoDbEmbeddedContentStore({ connectionUri, databaseName, searchIndex: { embeddingName, numDimensions = 1536, filters = [ { type: "filter", path: "sourceName", }, ], name = "vector_index", }, collectionName = "embedded_content", }) { const { mongoClient, db, drop, close } = (0, MongoDbDatabaseConnection_1.makeMongoDbDatabaseConnection)({ connectionUri, databaseName, }); const embeddedContentCollection = db.collection(collectionName); const embeddingPath = `embeddings.${embeddingName}`; return { drop, close, metadata: { databaseName, collectionName, embeddingName, embeddingPath, }, async loadEmbeddedContent({ page }) { return await embeddedContentCollection.find((0, _1.pageIdentity)(page)).toArray(); }, async deleteEmbeddedContent({ page, dataSources, inverseDataSources = false, }) { const deleteResult = await embeddedContentCollection.deleteMany({ ...(page ? (0, _1.pageIdentity)(page) : undefined), ...(dataSources ? { sourceName: { [inverseDataSources ? "$nin" : "$in"]: dataSources, }, } : undefined), }); if (!deleteResult.acknowledged) { throw new Error("EmbeddedContent deletion not acknowledged!"); } }, async updateEmbeddedContent({ page, embeddedContent }) { (0, assert_1.strict)(embeddedContent.length !== 0); embeddedContent.forEach((embeddedContent) => { (0, assert_1.strict)(embeddedContent.sourceName === page.sourceName && embeddedContent.url === page.url, `EmbeddedContent source/url (${embeddedContent.sourceName} / ${embeddedContent.url}) must match give page source/url (${page.sourceName} / ${page.url})!`); }); await mongoClient.withSession(async (session) => { await session.withTransaction(async () => { // First delete all the embeddedContent for the given page const deleteResult = await embeddedContentCollection.deleteMany((0, _1.pageIdentity)(page), { session }); if (!deleteResult.acknowledged) { throw new Error("EmbeddedContent deletion not acknowledged!"); } // Insert the embedded content for the page const insertResult = await embeddedContentCollection.insertMany([...embeddedContent], { session, }); if (!insertResult.acknowledged) { throw new Error("EmbeddedContent insertion not acknowledged!"); } const { insertedCount } = insertResult; if (insertedCount !== embeddedContent.length) { throw new Error(`Expected ${embeddedContent.length} inserted, got ${insertedCount}`); } }); }); }, /** @param vector - The vector to search for nearest neighbors to. @param options - Options for performing a nearest-neighbor search. */ async findNearestNeighbors(vector, options) { const { indexName, path, k, minScore, filter = {}, numCandidates, } = { // Default options indexName: name, path: embeddingPath, k: 3, minScore: 0, // User options override ...(options ?? {}), }; return embeddedContentCollection .aggregate([ { $vectorSearch: { index: indexName, queryVector: vector, path, limit: k, numCandidates: numCandidates ?? k * 15, filter, }, }, { $addFields: { score: { $meta: "vectorSearchScore", }, }, }, { $match: { score: { $gte: minScore } } }, ]) .toArray(); }, async init() { await embeddedContentCollection.createIndex({ sourceName: 1 }); await embeddedContentCollection.createIndex({ url: 1 }); try { const searchIndex = { name, type: "vectorSearch", definition: { fields: [ { numDimensions, path: embeddingPath, similarity: "cosine", type: "vector", }, ...filters, ], }, }; await embeddedContentCollection.createSearchIndex(searchIndex); } catch (error) { if (error instanceof mongodb_1.MongoServerError) { (0, assert_1.strict)(error.codeName === "IndexAlreadyExists", `An unexpected MongoError occurred: ${error.name}`); } else { throw error; } } }, }; } exports.makeMongoDbEmbeddedContentStore = makeMongoDbEmbeddedContentStore; //# sourceMappingURL=MongoDbEmbeddedContentStore.js.map