genkitx-chromadb
Version:
Genkit AI framework plugin for Chroma vector database.
267 lines • 8.75 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var src_exports = {};
__export(src_exports, {
ChromaIndexerOptionsSchema: () => ChromaIndexerOptionsSchema,
IncludeEnum: () => import_chromadb.IncludeEnum,
chroma: () => chroma,
chromaIndexer: () => chromaIndexer,
chromaIndexerRef: () => chromaIndexerRef,
chromaRetriever: () => chromaRetriever,
chromaRetrieverRef: () => chromaRetrieverRef,
createChromaCollection: () => createChromaCollection,
deleteChromaCollection: () => deleteChromaCollection
});
module.exports = __toCommonJS(src_exports);
var import_chromadb = require("chromadb");
var import_genkit = require("genkit");
var import_plugin = require("genkit/plugin");
var import_retriever = require("genkit/retriever");
var import_ts_md5 = require("ts-md5");
const WhereSchema = import_genkit.z.any();
const WhereDocumentSchema = import_genkit.z.any();
const IncludeOptionSchema = import_genkit.z.array(import_genkit.z.enum(["documents", "embeddings", "metadatas", "distances"])).optional();
const ChromaRetrieverOptionsSchema = import_retriever.CommonRetrieverOptionsSchema.extend({
include: IncludeOptionSchema,
where: WhereSchema.optional(),
whereDocument: WhereDocumentSchema.optional()
});
const ChromaIndexerOptionsSchema = import_genkit.z.null().optional();
function chroma(params) {
return (0, import_plugin.genkitPlugin)("chroma", async (ai) => {
params.map((i) => chromaRetriever(ai, i));
params.map((i) => chromaIndexer(ai, i));
});
}
const chromaRetrieverRef = (params) => {
return (0, import_genkit.retrieverRef)({
name: `chroma/${params.collectionName}`,
info: {
label: params.displayName ?? `Chroma DB - ${params.collectionName}`
},
configSchema: ChromaRetrieverOptionsSchema.optional()
});
};
const chromaIndexerRef = (params) => {
return (0, import_genkit.indexerRef)({
name: `chroma/${params.collectionName}`,
info: {
label: params.displayName ?? `Chroma DB - ${params.collectionName}`
},
configSchema: ChromaIndexerOptionsSchema.optional()
});
};
function chromaRetriever(ai, params) {
const { embedder, collectionName, embedderOptions } = params;
return ai.defineRetriever(
{
name: `chroma/${collectionName}`,
configSchema: ChromaRetrieverOptionsSchema.optional()
},
async (content, options) => {
const clientParams = await resolve(params.clientParams);
const client = new import_chromadb.ChromaClient(clientParams);
let collection;
if (params.createCollectionIfMissing) {
collection = await client.getOrCreateCollection({
name: collectionName
});
} else {
collection = await client.getCollection({
name: collectionName
});
}
const queryEmbeddings = await ai.embed({
embedder,
content,
options: embedderOptions
});
const results = await collection.query({
nResults: options?.k,
include: getIncludes(options?.include),
where: options?.where,
whereDocument: options?.whereDocument,
queryEmbeddings: queryEmbeddings[0].embedding
});
const documents = results.documents[0];
const metadatas = results.metadatas;
const embeddings = results.embeddings;
const distances = results.distances;
const combined = documents.map((d, i) => {
if (d !== null) {
return {
document: d,
metadata: constructMetadata(i, metadatas, embeddings, distances)
};
}
return void 0;
}).filter(
(r) => !!r
);
return {
documents: combined.map((result) => {
const data = result.document;
const metadata = result.metadata.metadata[0];
const dataType = metadata.dataType;
const docMetadata = metadata.docMetadata ? JSON.parse(metadata.docMetadata) : void 0;
return import_genkit.Document.fromData(data, dataType, docMetadata).toJSON();
})
};
}
);
}
function getIncludes(includes) {
if (!includes) {
return void 0;
}
let effectiveIncludes = [import_chromadb.IncludeEnum.Documents];
effectiveIncludes = effectiveIncludes.concat(includes);
const includesSet = new Set(effectiveIncludes);
return Array.from(includesSet);
}
function constructMetadata(i, metadatas, embeddings, distances) {
var fullMetadata = {};
if (metadatas && metadatas[i]) {
fullMetadata.metadata = metadatas[i];
}
if (embeddings) {
fullMetadata.embedding = embeddings[i];
}
if (distances) {
fullMetadata.distances = distances[i];
}
return fullMetadata;
}
function chromaIndexer(ai, params) {
const { collectionName, embedder, embedderOptions } = {
...params
};
return ai.defineIndexer(
{
name: `chroma/${params.collectionName}`,
configSchema: ChromaIndexerOptionsSchema
},
async (docs) => {
const clientParams = await resolve(params.clientParams);
const client = new import_chromadb.ChromaClient(clientParams);
let collection;
if (params.createCollectionIfMissing) {
collection = await client.getOrCreateCollection({
name: collectionName
});
} else {
collection = await client.getCollection({
name: collectionName
});
}
const embeddings = await Promise.all(
docs.map(
(doc) => ai.embed({
embedder,
content: doc,
options: embedderOptions
})
)
);
const entries = embeddings.map((value, i) => {
const doc = docs[i];
const docEmbeddings = value;
const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings);
return docEmbeddings.map((docEmbedding, j) => {
const metadata = {
docMetadata: JSON.stringify(embeddingDocs[j].metadata),
dataType: embeddingDocs[j].dataType || ""
};
const data = embeddingDocs[j].data;
const id = import_ts_md5.Md5.hashStr(JSON.stringify(embeddingDocs[j]));
return {
id,
value: docEmbedding.embedding,
document: data,
metadata
};
});
}).reduce((acc, val) => {
return acc.concat(val);
}, []);
await collection.add({
ids: entries.map((e) => e.id),
embeddings: entries.map((e) => e.value),
metadatas: entries.map((e) => e.metadata),
documents: entries.map((e) => e.document)
});
}
);
}
async function createChromaCollection(ai, params) {
let chromaEmbedder = void 0;
const embedder = params.embedder;
if (!!embedder) {
chromaEmbedder = {
generate(texts) {
return Promise.all(
texts.map(
(text) => ai.embed({
embedder,
content: text,
options: params.embedderOptions
})
)
).then((results) => {
return results.map((result) => result[0].embedding);
});
}
};
}
const clientParams = await resolve(params.clientParams);
const client = new import_chromadb.ChromaClient(clientParams);
return await client.createCollection({
...params,
embeddingFunction: chromaEmbedder
});
}
async function deleteChromaCollection(params) {
const clientParams = await resolve(params.clientParams);
const client = new import_chromadb.ChromaClient(clientParams);
return await client.deleteCollection({
...params
});
}
async function resolve(params) {
if (!params) {
return void 0;
}
if (typeof params === "function") {
return await params();
}
return params;
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
ChromaIndexerOptionsSchema,
IncludeEnum,
chroma,
chromaIndexer,
chromaIndexerRef,
chromaRetriever,
chromaRetrieverRef,
createChromaCollection,
deleteChromaCollection
});
//# sourceMappingURL=index.js.map