UNPKG

genkitx-chromadb

Version:

Genkit AI framework plugin for Chroma vector database.

243 lines 7.16 kB
import { ChromaClient, IncludeEnum } from "chromadb"; import { Document, indexerRef, retrieverRef, z } from "genkit"; import { genkitPlugin } from "genkit/plugin"; import { CommonRetrieverOptionsSchema } from "genkit/retriever"; import { Md5 } from "ts-md5"; const WhereSchema = z.any(); const WhereDocumentSchema = z.any(); const IncludeOptionSchema = z.array(z.enum(["documents", "embeddings", "metadatas", "distances"])).optional(); const ChromaRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({ include: IncludeOptionSchema, where: WhereSchema.optional(), whereDocument: WhereDocumentSchema.optional() }); const ChromaIndexerOptionsSchema = z.null().optional(); function chroma(params) { return genkitPlugin("chroma", async (ai) => { params.map((i) => chromaRetriever(ai, i)); params.map((i) => chromaIndexer(ai, i)); }); } const chromaRetrieverRef = (params) => { return retrieverRef({ name: `chroma/${params.collectionName}`, info: { label: params.displayName ?? `Chroma DB - ${params.collectionName}` }, configSchema: ChromaRetrieverOptionsSchema.optional() }); }; const chromaIndexerRef = (params) => { return indexerRef({ name: `chroma/${params.collectionName}`, info: { label: params.displayName ?? `Chroma DB - ${params.collectionName}` }, configSchema: ChromaIndexerOptionsSchema.optional() }); }; function chromaRetriever(ai, params) { const { embedder, collectionName, embedderOptions } = params; return ai.defineRetriever( { name: `chroma/${collectionName}`, configSchema: ChromaRetrieverOptionsSchema.optional() }, async (content, options) => { const clientParams = await resolve(params.clientParams); const client = new ChromaClient(clientParams); let collection; if (params.createCollectionIfMissing) { collection = await client.getOrCreateCollection({ name: collectionName }); } else { collection = await client.getCollection({ name: collectionName }); } const queryEmbeddings = await ai.embed({ embedder, content, options: embedderOptions }); const results = await collection.query({ nResults: options?.k, include: getIncludes(options?.include), where: options?.where, whereDocument: options?.whereDocument, queryEmbeddings: queryEmbeddings[0].embedding }); const documents = results.documents[0]; const metadatas = results.metadatas; const embeddings = results.embeddings; const distances = results.distances; const combined = documents.map((d, i) => { if (d !== null) { return { document: d, metadata: constructMetadata(i, metadatas, embeddings, distances) }; } return void 0; }).filter( (r) => !!r ); return { documents: combined.map((result) => { const data = result.document; const metadata = result.metadata.metadata[0]; const dataType = metadata.dataType; const docMetadata = metadata.docMetadata ? JSON.parse(metadata.docMetadata) : void 0; return Document.fromData(data, dataType, docMetadata).toJSON(); }) }; } ); } function getIncludes(includes) { if (!includes) { return void 0; } let effectiveIncludes = [IncludeEnum.Documents]; effectiveIncludes = effectiveIncludes.concat(includes); const includesSet = new Set(effectiveIncludes); return Array.from(includesSet); } function constructMetadata(i, metadatas, embeddings, distances) { var fullMetadata = {}; if (metadatas && metadatas[i]) { fullMetadata.metadata = metadatas[i]; } if (embeddings) { fullMetadata.embedding = embeddings[i]; } if (distances) { fullMetadata.distances = distances[i]; } return fullMetadata; } function chromaIndexer(ai, params) { const { collectionName, embedder, embedderOptions } = { ...params }; return ai.defineIndexer( { name: `chroma/${params.collectionName}`, configSchema: ChromaIndexerOptionsSchema }, async (docs) => { const clientParams = await resolve(params.clientParams); const client = new ChromaClient(clientParams); let collection; if (params.createCollectionIfMissing) { collection = await client.getOrCreateCollection({ name: collectionName }); } else { collection = await client.getCollection({ name: collectionName }); } const embeddings = await Promise.all( docs.map( (doc) => ai.embed({ embedder, content: doc, options: embedderOptions }) ) ); const entries = embeddings.map((value, i) => { const doc = docs[i]; const docEmbeddings = value; const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings); return docEmbeddings.map((docEmbedding, j) => { const metadata = { docMetadata: JSON.stringify(embeddingDocs[j].metadata), dataType: embeddingDocs[j].dataType || "" }; const data = embeddingDocs[j].data; const id = Md5.hashStr(JSON.stringify(embeddingDocs[j])); return { id, value: docEmbedding.embedding, document: data, metadata }; }); }).reduce((acc, val) => { return acc.concat(val); }, []); await collection.add({ ids: entries.map((e) => e.id), embeddings: entries.map((e) => e.value), metadatas: entries.map((e) => e.metadata), documents: entries.map((e) => e.document) }); } ); } async function createChromaCollection(ai, params) { let chromaEmbedder = void 0; const embedder = params.embedder; if (!!embedder) { chromaEmbedder = { generate(texts) { return Promise.all( texts.map( (text) => ai.embed({ embedder, content: text, options: params.embedderOptions }) ) ).then((results) => { return results.map((result) => result[0].embedding); }); } }; } const clientParams = await resolve(params.clientParams); const client = new ChromaClient(clientParams); return await client.createCollection({ ...params, embeddingFunction: chromaEmbedder }); } async function deleteChromaCollection(params) { const clientParams = await resolve(params.clientParams); const client = new ChromaClient(clientParams); return await client.deleteCollection({ ...params }); } async function resolve(params) { if (!params) { return void 0; } if (typeof params === "function") { return await params(); } return params; } export { ChromaIndexerOptionsSchema, IncludeEnum, chroma, chromaIndexer, chromaIndexerRef, chromaRetriever, chromaRetrieverRef, createChromaCollection, deleteChromaCollection }; //# sourceMappingURL=index.mjs.map