@genkit-ai/dev-local-vectorstore
Version:
Genkit AI framework plugin for temporary local vector database.
134 lines • 3.94 kB
JavaScript
import similarity from "compute-cosine-similarity";
import * as fs from "fs";
import { z } from "genkit";
import { genkitPlugin } from "genkit/plugin";
import {
CommonRetrieverOptionsSchema,
Document,
indexerRef,
retrieverRef
} from "genkit/retriever";
import { Md5 } from "ts-md5";
const _LOCAL_FILESTORE = "__db_{INDEX_NAME}.json";
function loadFilestore(indexName) {
let existingData = {};
const indexFileName = _LOCAL_FILESTORE.replace("{INDEX_NAME}", indexName);
if (fs.existsSync(indexFileName)) {
existingData = JSON.parse(fs.readFileSync(indexFileName).toString());
}
return existingData;
}
function addDocument(embedding, doc, contents) {
const id = Md5.hashStr(JSON.stringify(doc));
if (!(id in contents)) {
contents[id] = { doc, embedding };
} else {
console.debug(`Skipping ${id} since it is already present`);
}
}
function devLocalVectorstore(params) {
return genkitPlugin("devLocalVectorstore", async (ai) => {
params.map((p) => configureDevLocalRetriever(ai, p));
params.map((p) => configureDevLocalIndexer(ai, p));
});
}
var index_default = devLocalVectorstore;
function devLocalRetrieverRef(indexName) {
return retrieverRef({
name: `devLocalVectorstore/${indexName}`,
info: {
label: `Local file-based Retriever - ${indexName}`
},
configSchema: CommonRetrieverOptionsSchema.optional()
});
}
function devLocalIndexerRef(indexName) {
return indexerRef({
name: `devLocalVectorstore/${indexName}`,
info: {
label: `Local file-based Indexer - ${indexName}`
},
configSchema: z.null().optional()
});
}
async function importDocumentsToLocalVectorstore(ai, params) {
const { docs, embedder, embedderOptions } = { ...params };
const data = loadFilestore(params.indexName);
await Promise.all(
docs.map(async (doc) => {
const embeddings = await ai.embed({
embedder,
content: doc,
options: embedderOptions
});
const embeddingDocs = doc.getEmbeddingDocuments(embeddings);
for (const i in embeddingDocs) {
addDocument(embeddings[i], embeddingDocs[i], data);
}
})
);
fs.writeFileSync(
_LOCAL_FILESTORE.replace("{INDEX_NAME}", params.indexName),
JSON.stringify(data, null, 2)
);
}
async function getClosestDocuments(params) {
const scoredDocs = [];
for (const value of Object.values(params.db)) {
const thisEmbedding = value.embedding.embedding;
const score = similarity(params.queryEmbeddings, thisEmbedding) ?? 0;
scoredDocs.push({
score,
doc: new Document(value.doc)
});
}
scoredDocs.sort((a, b) => a.score > b.score ? -1 : 1);
return scoredDocs.slice(0, params.k).map((o) => o.doc);
}
function configureDevLocalRetriever(ai, params) {
const { embedder, embedderOptions } = params;
const vectorstore = ai.defineRetriever(
{
name: `devLocalVectorstore/${params.indexName}`,
configSchema: CommonRetrieverOptionsSchema
},
async (content, options) => {
const db = loadFilestore(params.indexName);
const embeddings = await ai.embed({
embedder,
content,
options: embedderOptions
});
return {
documents: await getClosestDocuments({
k: options?.k ?? 3,
queryEmbeddings: embeddings[0].embedding,
db
})
};
}
);
return vectorstore;
}
function configureDevLocalIndexer(ai, params) {
const { embedder, embedderOptions } = params;
const vectorstore = ai.defineIndexer(
{ name: `devLocalVectorstore/${params.indexName}` },
async (docs) => {
await importDocumentsToLocalVectorstore(ai, {
indexName: params.indexName,
docs,
embedder,
embedderOptions
});
}
);
return vectorstore;
}
export {
index_default as default,
devLocalIndexerRef,
devLocalRetrieverRef,
devLocalVectorstore
};
//# sourceMappingURL=index.mjs.map