genkitx-chromadb
Version:
Genkit AI framework plugin for Chroma vector database.
243 lines • 7.16 kB
JavaScript
import {
ChromaClient,
IncludeEnum
} from "chromadb";
import {
Document,
indexerRef,
retrieverRef,
z
} from "genkit";
import { genkitPlugin } from "genkit/plugin";
import { CommonRetrieverOptionsSchema } from "genkit/retriever";
import { Md5 } from "ts-md5";
const WhereSchema = z.any();
const WhereDocumentSchema = z.any();
const IncludeOptionSchema = z.array(z.enum(["documents", "embeddings", "metadatas", "distances"])).optional();
const ChromaRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({
include: IncludeOptionSchema,
where: WhereSchema.optional(),
whereDocument: WhereDocumentSchema.optional()
});
const ChromaIndexerOptionsSchema = z.null().optional();
function chroma(params) {
return genkitPlugin("chroma", async (ai) => {
params.map((i) => chromaRetriever(ai, i));
params.map((i) => chromaIndexer(ai, i));
});
}
const chromaRetrieverRef = (params) => {
return retrieverRef({
name: `chroma/${params.collectionName}`,
info: {
label: params.displayName ?? `Chroma DB - ${params.collectionName}`
},
configSchema: ChromaRetrieverOptionsSchema.optional()
});
};
const chromaIndexerRef = (params) => {
return indexerRef({
name: `chroma/${params.collectionName}`,
info: {
label: params.displayName ?? `Chroma DB - ${params.collectionName}`
},
configSchema: ChromaIndexerOptionsSchema.optional()
});
};
function chromaRetriever(ai, params) {
const { embedder, collectionName, embedderOptions } = params;
return ai.defineRetriever(
{
name: `chroma/${collectionName}`,
configSchema: ChromaRetrieverOptionsSchema.optional()
},
async (content, options) => {
const clientParams = await resolve(params.clientParams);
const client = new ChromaClient(clientParams);
let collection;
if (params.createCollectionIfMissing) {
collection = await client.getOrCreateCollection({
name: collectionName
});
} else {
collection = await client.getCollection({
name: collectionName
});
}
const queryEmbeddings = await ai.embed({
embedder,
content,
options: embedderOptions
});
const results = await collection.query({
nResults: options?.k,
include: getIncludes(options?.include),
where: options?.where,
whereDocument: options?.whereDocument,
queryEmbeddings: queryEmbeddings[0].embedding
});
const documents = results.documents[0];
const metadatas = results.metadatas;
const embeddings = results.embeddings;
const distances = results.distances;
const combined = documents.map((d, i) => {
if (d !== null) {
return {
document: d,
metadata: constructMetadata(i, metadatas, embeddings, distances)
};
}
return void 0;
}).filter(
(r) => !!r
);
return {
documents: combined.map((result) => {
const data = result.document;
const metadata = result.metadata.metadata[0];
const dataType = metadata.dataType;
const docMetadata = metadata.docMetadata ? JSON.parse(metadata.docMetadata) : void 0;
return Document.fromData(data, dataType, docMetadata).toJSON();
})
};
}
);
}
function getIncludes(includes) {
if (!includes) {
return void 0;
}
let effectiveIncludes = [IncludeEnum.Documents];
effectiveIncludes = effectiveIncludes.concat(includes);
const includesSet = new Set(effectiveIncludes);
return Array.from(includesSet);
}
function constructMetadata(i, metadatas, embeddings, distances) {
var fullMetadata = {};
if (metadatas && metadatas[i]) {
fullMetadata.metadata = metadatas[i];
}
if (embeddings) {
fullMetadata.embedding = embeddings[i];
}
if (distances) {
fullMetadata.distances = distances[i];
}
return fullMetadata;
}
function chromaIndexer(ai, params) {
const { collectionName, embedder, embedderOptions } = {
...params
};
return ai.defineIndexer(
{
name: `chroma/${params.collectionName}`,
configSchema: ChromaIndexerOptionsSchema
},
async (docs) => {
const clientParams = await resolve(params.clientParams);
const client = new ChromaClient(clientParams);
let collection;
if (params.createCollectionIfMissing) {
collection = await client.getOrCreateCollection({
name: collectionName
});
} else {
collection = await client.getCollection({
name: collectionName
});
}
const embeddings = await Promise.all(
docs.map(
(doc) => ai.embed({
embedder,
content: doc,
options: embedderOptions
})
)
);
const entries = embeddings.map((value, i) => {
const doc = docs[i];
const docEmbeddings = value;
const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings);
return docEmbeddings.map((docEmbedding, j) => {
const metadata = {
docMetadata: JSON.stringify(embeddingDocs[j].metadata),
dataType: embeddingDocs[j].dataType || ""
};
const data = embeddingDocs[j].data;
const id = Md5.hashStr(JSON.stringify(embeddingDocs[j]));
return {
id,
value: docEmbedding.embedding,
document: data,
metadata
};
});
}).reduce((acc, val) => {
return acc.concat(val);
}, []);
await collection.add({
ids: entries.map((e) => e.id),
embeddings: entries.map((e) => e.value),
metadatas: entries.map((e) => e.metadata),
documents: entries.map((e) => e.document)
});
}
);
}
async function createChromaCollection(ai, params) {
let chromaEmbedder = void 0;
const embedder = params.embedder;
if (!!embedder) {
chromaEmbedder = {
generate(texts) {
return Promise.all(
texts.map(
(text) => ai.embed({
embedder,
content: text,
options: params.embedderOptions
})
)
).then((results) => {
return results.map((result) => result[0].embedding);
});
}
};
}
const clientParams = await resolve(params.clientParams);
const client = new ChromaClient(clientParams);
return await client.createCollection({
...params,
embeddingFunction: chromaEmbedder
});
}
async function deleteChromaCollection(params) {
const clientParams = await resolve(params.clientParams);
const client = new ChromaClient(clientParams);
return await client.deleteCollection({
...params
});
}
async function resolve(params) {
if (!params) {
return void 0;
}
if (typeof params === "function") {
return await params();
}
return params;
}
export {
ChromaIndexerOptionsSchema,
IncludeEnum,
chroma,
chromaIndexer,
chromaIndexerRef,
chromaRetriever,
chromaRetrieverRef,
createChromaCollection,
deleteChromaCollection
};
//# sourceMappingURL=index.mjs.map