UNPKG

@genkit-ai/dev-local-vectorstore

Version:

Genkit AI framework plugin for temporary local vector database.

134 lines 3.94 kB
import similarity from "compute-cosine-similarity"; import * as fs from "fs"; import { z } from "genkit"; import { genkitPlugin } from "genkit/plugin"; import { CommonRetrieverOptionsSchema, Document, indexerRef, retrieverRef } from "genkit/retriever"; import { Md5 } from "ts-md5"; const _LOCAL_FILESTORE = "__db_{INDEX_NAME}.json"; function loadFilestore(indexName) { let existingData = {}; const indexFileName = _LOCAL_FILESTORE.replace("{INDEX_NAME}", indexName); if (fs.existsSync(indexFileName)) { existingData = JSON.parse(fs.readFileSync(indexFileName).toString()); } return existingData; } function addDocument(embedding, doc, contents) { const id = Md5.hashStr(JSON.stringify(doc)); if (!(id in contents)) { contents[id] = { doc, embedding }; } else { console.debug(`Skipping ${id} since it is already present`); } } function devLocalVectorstore(params) { return genkitPlugin("devLocalVectorstore", async (ai) => { params.map((p) => configureDevLocalRetriever(ai, p)); params.map((p) => configureDevLocalIndexer(ai, p)); }); } var index_default = devLocalVectorstore; function devLocalRetrieverRef(indexName) { return retrieverRef({ name: `devLocalVectorstore/${indexName}`, info: { label: `Local file-based Retriever - ${indexName}` }, configSchema: CommonRetrieverOptionsSchema.optional() }); } function devLocalIndexerRef(indexName) { return indexerRef({ name: `devLocalVectorstore/${indexName}`, info: { label: `Local file-based Indexer - ${indexName}` }, configSchema: z.null().optional() }); } async function importDocumentsToLocalVectorstore(ai, params) { const { docs, embedder, embedderOptions } = { ...params }; const data = loadFilestore(params.indexName); await Promise.all( docs.map(async (doc) => { const embeddings = await ai.embed({ embedder, content: doc, options: embedderOptions }); const embeddingDocs = doc.getEmbeddingDocuments(embeddings); for (const i in embeddingDocs) { addDocument(embeddings[i], embeddingDocs[i], data); } }) ); fs.writeFileSync( _LOCAL_FILESTORE.replace("{INDEX_NAME}", params.indexName), JSON.stringify(data, null, 2) ); } async function getClosestDocuments(params) { const scoredDocs = []; for (const value of Object.values(params.db)) { const thisEmbedding = value.embedding.embedding; const score = similarity(params.queryEmbeddings, thisEmbedding) ?? 0; scoredDocs.push({ score, doc: new Document(value.doc) }); } scoredDocs.sort((a, b) => a.score > b.score ? -1 : 1); return scoredDocs.slice(0, params.k).map((o) => o.doc); } function configureDevLocalRetriever(ai, params) { const { embedder, embedderOptions } = params; const vectorstore = ai.defineRetriever( { name: `devLocalVectorstore/${params.indexName}`, configSchema: CommonRetrieverOptionsSchema }, async (content, options) => { const db = loadFilestore(params.indexName); const embeddings = await ai.embed({ embedder, content, options: embedderOptions }); return { documents: await getClosestDocuments({ k: options?.k ?? 3, queryEmbeddings: embeddings[0].embedding, db }) }; } ); return vectorstore; } function configureDevLocalIndexer(ai, params) { const { embedder, embedderOptions } = params; const vectorstore = ai.defineIndexer( { name: `devLocalVectorstore/${params.indexName}` }, async (docs) => { await importDocumentsToLocalVectorstore(ai, { indexName: params.indexName, docs, embedder, embedderOptions }); } ); return vectorstore; } export { index_default as default, devLocalIndexerRef, devLocalRetrieverRef, devLocalVectorstore }; //# sourceMappingURL=index.mjs.map