@genkit-ai/dev-local-vectorstore
Version:
Genkit AI framework plugin for temporary local vector database.
165 lines • 5.84 kB
JavaScript
;
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var index_exports = {};
__export(index_exports, {
default: () => index_default,
devLocalIndexerRef: () => devLocalIndexerRef,
devLocalRetrieverRef: () => devLocalRetrieverRef,
devLocalVectorstore: () => devLocalVectorstore
});
module.exports = __toCommonJS(index_exports);
var import_compute_cosine_similarity = __toESM(require("compute-cosine-similarity"));
var fs = __toESM(require("fs"));
var import_genkit = require("genkit");
var import_plugin = require("genkit/plugin");
var import_retriever = require("genkit/retriever");
var import_ts_md5 = require("ts-md5");
const _LOCAL_FILESTORE = "__db_{INDEX_NAME}.json";
function loadFilestore(indexName) {
let existingData = {};
const indexFileName = _LOCAL_FILESTORE.replace("{INDEX_NAME}", indexName);
if (fs.existsSync(indexFileName)) {
existingData = JSON.parse(fs.readFileSync(indexFileName).toString());
}
return existingData;
}
function addDocument(embedding, doc, contents) {
const id = import_ts_md5.Md5.hashStr(JSON.stringify(doc));
if (!(id in contents)) {
contents[id] = { doc, embedding };
} else {
console.debug(`Skipping ${id} since it is already present`);
}
}
function devLocalVectorstore(params) {
return (0, import_plugin.genkitPlugin)("devLocalVectorstore", async (ai) => {
params.map((p) => configureDevLocalRetriever(ai, p));
params.map((p) => configureDevLocalIndexer(ai, p));
});
}
var index_default = devLocalVectorstore;
function devLocalRetrieverRef(indexName) {
return (0, import_retriever.retrieverRef)({
name: `devLocalVectorstore/${indexName}`,
info: {
label: `Local file-based Retriever - ${indexName}`
},
configSchema: import_retriever.CommonRetrieverOptionsSchema.optional()
});
}
function devLocalIndexerRef(indexName) {
return (0, import_retriever.indexerRef)({
name: `devLocalVectorstore/${indexName}`,
info: {
label: `Local file-based Indexer - ${indexName}`
},
configSchema: import_genkit.z.null().optional()
});
}
async function importDocumentsToLocalVectorstore(ai, params) {
const { docs, embedder, embedderOptions } = { ...params };
const data = loadFilestore(params.indexName);
await Promise.all(
docs.map(async (doc) => {
const embeddings = await ai.embed({
embedder,
content: doc,
options: embedderOptions
});
const embeddingDocs = doc.getEmbeddingDocuments(embeddings);
for (const i in embeddingDocs) {
addDocument(embeddings[i], embeddingDocs[i], data);
}
})
);
fs.writeFileSync(
_LOCAL_FILESTORE.replace("{INDEX_NAME}", params.indexName),
JSON.stringify(data, null, 2)
);
}
async function getClosestDocuments(params) {
const scoredDocs = [];
for (const value of Object.values(params.db)) {
const thisEmbedding = value.embedding.embedding;
const score = (0, import_compute_cosine_similarity.default)(params.queryEmbeddings, thisEmbedding) ?? 0;
scoredDocs.push({
score,
doc: new import_retriever.Document(value.doc)
});
}
scoredDocs.sort((a, b) => a.score > b.score ? -1 : 1);
return scoredDocs.slice(0, params.k).map((o) => o.doc);
}
function configureDevLocalRetriever(ai, params) {
const { embedder, embedderOptions } = params;
const vectorstore = ai.defineRetriever(
{
name: `devLocalVectorstore/${params.indexName}`,
configSchema: import_retriever.CommonRetrieverOptionsSchema
},
async (content, options) => {
const db = loadFilestore(params.indexName);
const embeddings = await ai.embed({
embedder,
content,
options: embedderOptions
});
return {
documents: await getClosestDocuments({
k: options?.k ?? 3,
queryEmbeddings: embeddings[0].embedding,
db
})
};
}
);
return vectorstore;
}
function configureDevLocalIndexer(ai, params) {
const { embedder, embedderOptions } = params;
const vectorstore = ai.defineIndexer(
{ name: `devLocalVectorstore/${params.indexName}` },
async (docs) => {
await importDocumentsToLocalVectorstore(ai, {
indexName: params.indexName,
docs,
embedder,
embedderOptions
});
}
);
return vectorstore;
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
devLocalIndexerRef,
devLocalRetrieverRef,
devLocalVectorstore
});
//# sourceMappingURL=index.js.map