UNPKG

@genkit-ai/dev-local-vectorstore

Version:

Genkit AI framework plugin for temporary local vector database.

165 lines 5.84 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var index_exports = {}; __export(index_exports, { default: () => index_default, devLocalIndexerRef: () => devLocalIndexerRef, devLocalRetrieverRef: () => devLocalRetrieverRef, devLocalVectorstore: () => devLocalVectorstore }); module.exports = __toCommonJS(index_exports); var import_compute_cosine_similarity = __toESM(require("compute-cosine-similarity")); var fs = __toESM(require("fs")); var import_genkit = require("genkit"); var import_plugin = require("genkit/plugin"); var import_retriever = require("genkit/retriever"); var import_ts_md5 = require("ts-md5"); const _LOCAL_FILESTORE = "__db_{INDEX_NAME}.json"; function loadFilestore(indexName) { let existingData = {}; const indexFileName = _LOCAL_FILESTORE.replace("{INDEX_NAME}", indexName); if (fs.existsSync(indexFileName)) { existingData = JSON.parse(fs.readFileSync(indexFileName).toString()); } return existingData; } function addDocument(embedding, doc, contents) { const id = import_ts_md5.Md5.hashStr(JSON.stringify(doc)); if (!(id in contents)) { contents[id] = { doc, embedding }; } else { console.debug(`Skipping ${id} since it is already present`); } } function devLocalVectorstore(params) { return (0, import_plugin.genkitPlugin)("devLocalVectorstore", async (ai) => { params.map((p) => configureDevLocalRetriever(ai, p)); params.map((p) => configureDevLocalIndexer(ai, p)); }); } var index_default = devLocalVectorstore; function devLocalRetrieverRef(indexName) { return (0, import_retriever.retrieverRef)({ name: `devLocalVectorstore/${indexName}`, info: { label: `Local file-based Retriever - ${indexName}` }, configSchema: import_retriever.CommonRetrieverOptionsSchema.optional() }); } function devLocalIndexerRef(indexName) { return (0, import_retriever.indexerRef)({ name: `devLocalVectorstore/${indexName}`, info: { label: `Local file-based Indexer - ${indexName}` }, configSchema: import_genkit.z.null().optional() }); } async function importDocumentsToLocalVectorstore(ai, params) { const { docs, embedder, embedderOptions } = { ...params }; const data = loadFilestore(params.indexName); await Promise.all( docs.map(async (doc) => { const embeddings = await ai.embed({ embedder, content: doc, options: embedderOptions }); const embeddingDocs = doc.getEmbeddingDocuments(embeddings); for (const i in embeddingDocs) { addDocument(embeddings[i], embeddingDocs[i], data); } }) ); fs.writeFileSync( _LOCAL_FILESTORE.replace("{INDEX_NAME}", params.indexName), JSON.stringify(data, null, 2) ); } async function getClosestDocuments(params) { const scoredDocs = []; for (const value of Object.values(params.db)) { const thisEmbedding = value.embedding.embedding; const score = (0, import_compute_cosine_similarity.default)(params.queryEmbeddings, thisEmbedding) ?? 0; scoredDocs.push({ score, doc: new import_retriever.Document(value.doc) }); } scoredDocs.sort((a, b) => a.score > b.score ? -1 : 1); return scoredDocs.slice(0, params.k).map((o) => o.doc); } function configureDevLocalRetriever(ai, params) { const { embedder, embedderOptions } = params; const vectorstore = ai.defineRetriever( { name: `devLocalVectorstore/${params.indexName}`, configSchema: import_retriever.CommonRetrieverOptionsSchema }, async (content, options) => { const db = loadFilestore(params.indexName); const embeddings = await ai.embed({ embedder, content, options: embedderOptions }); return { documents: await getClosestDocuments({ k: options?.k ?? 3, queryEmbeddings: embeddings[0].embedding, db }) }; } ); return vectorstore; } function configureDevLocalIndexer(ai, params) { const { embedder, embedderOptions } = params; const vectorstore = ai.defineIndexer( { name: `devLocalVectorstore/${params.indexName}` }, async (docs) => { await importDocumentsToLocalVectorstore(ai, { indexName: params.indexName, docs, embedder, embedderOptions }); } ); return vectorstore; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { devLocalIndexerRef, devLocalRetrieverRef, devLocalVectorstore }); //# sourceMappingURL=index.js.map