UNPKG

genkitx-chromadb

Version:

Genkit AI framework plugin for Chroma vector database.

267 lines 8.75 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var src_exports = {}; __export(src_exports, { ChromaIndexerOptionsSchema: () => ChromaIndexerOptionsSchema, IncludeEnum: () => import_chromadb.IncludeEnum, chroma: () => chroma, chromaIndexer: () => chromaIndexer, chromaIndexerRef: () => chromaIndexerRef, chromaRetriever: () => chromaRetriever, chromaRetrieverRef: () => chromaRetrieverRef, createChromaCollection: () => createChromaCollection, deleteChromaCollection: () => deleteChromaCollection }); module.exports = __toCommonJS(src_exports); var import_chromadb = require("chromadb"); var import_genkit = require("genkit"); var import_plugin = require("genkit/plugin"); var import_retriever = require("genkit/retriever"); var import_ts_md5 = require("ts-md5"); const WhereSchema = import_genkit.z.any(); const WhereDocumentSchema = import_genkit.z.any(); const IncludeOptionSchema = import_genkit.z.array(import_genkit.z.enum(["documents", "embeddings", "metadatas", "distances"])).optional(); const ChromaRetrieverOptionsSchema = import_retriever.CommonRetrieverOptionsSchema.extend({ include: IncludeOptionSchema, where: WhereSchema.optional(), whereDocument: WhereDocumentSchema.optional() }); const ChromaIndexerOptionsSchema = import_genkit.z.null().optional(); function chroma(params) { return (0, import_plugin.genkitPlugin)("chroma", async (ai) => { params.map((i) => chromaRetriever(ai, i)); params.map((i) => chromaIndexer(ai, i)); }); } const chromaRetrieverRef = (params) => { return (0, import_genkit.retrieverRef)({ name: `chroma/${params.collectionName}`, info: { label: params.displayName ?? `Chroma DB - ${params.collectionName}` }, configSchema: ChromaRetrieverOptionsSchema.optional() }); }; const chromaIndexerRef = (params) => { return (0, import_genkit.indexerRef)({ name: `chroma/${params.collectionName}`, info: { label: params.displayName ?? `Chroma DB - ${params.collectionName}` }, configSchema: ChromaIndexerOptionsSchema.optional() }); }; function chromaRetriever(ai, params) { const { embedder, collectionName, embedderOptions } = params; return ai.defineRetriever( { name: `chroma/${collectionName}`, configSchema: ChromaRetrieverOptionsSchema.optional() }, async (content, options) => { const clientParams = await resolve(params.clientParams); const client = new import_chromadb.ChromaClient(clientParams); let collection; if (params.createCollectionIfMissing) { collection = await client.getOrCreateCollection({ name: collectionName }); } else { collection = await client.getCollection({ name: collectionName }); } const queryEmbeddings = await ai.embed({ embedder, content, options: embedderOptions }); const results = await collection.query({ nResults: options?.k, include: getIncludes(options?.include), where: options?.where, whereDocument: options?.whereDocument, queryEmbeddings: queryEmbeddings[0].embedding }); const documents = results.documents[0]; const metadatas = results.metadatas; const embeddings = results.embeddings; const distances = results.distances; const combined = documents.map((d, i) => { if (d !== null) { return { document: d, metadata: constructMetadata(i, metadatas, embeddings, distances) }; } return void 0; }).filter( (r) => !!r ); return { documents: combined.map((result) => { const data = result.document; const metadata = result.metadata.metadata[0]; const dataType = metadata.dataType; const docMetadata = metadata.docMetadata ? JSON.parse(metadata.docMetadata) : void 0; return import_genkit.Document.fromData(data, dataType, docMetadata).toJSON(); }) }; } ); } function getIncludes(includes) { if (!includes) { return void 0; } let effectiveIncludes = [import_chromadb.IncludeEnum.Documents]; effectiveIncludes = effectiveIncludes.concat(includes); const includesSet = new Set(effectiveIncludes); return Array.from(includesSet); } function constructMetadata(i, metadatas, embeddings, distances) { var fullMetadata = {}; if (metadatas && metadatas[i]) { fullMetadata.metadata = metadatas[i]; } if (embeddings) { fullMetadata.embedding = embeddings[i]; } if (distances) { fullMetadata.distances = distances[i]; } return fullMetadata; } function chromaIndexer(ai, params) { const { collectionName, embedder, embedderOptions } = { ...params }; return ai.defineIndexer( { name: `chroma/${params.collectionName}`, configSchema: ChromaIndexerOptionsSchema }, async (docs) => { const clientParams = await resolve(params.clientParams); const client = new import_chromadb.ChromaClient(clientParams); let collection; if (params.createCollectionIfMissing) { collection = await client.getOrCreateCollection({ name: collectionName }); } else { collection = await client.getCollection({ name: collectionName }); } const embeddings = await Promise.all( docs.map( (doc) => ai.embed({ embedder, content: doc, options: embedderOptions }) ) ); const entries = embeddings.map((value, i) => { const doc = docs[i]; const docEmbeddings = value; const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings); return docEmbeddings.map((docEmbedding, j) => { const metadata = { docMetadata: JSON.stringify(embeddingDocs[j].metadata), dataType: embeddingDocs[j].dataType || "" }; const data = embeddingDocs[j].data; const id = import_ts_md5.Md5.hashStr(JSON.stringify(embeddingDocs[j])); return { id, value: docEmbedding.embedding, document: data, metadata }; }); }).reduce((acc, val) => { return acc.concat(val); }, []); await collection.add({ ids: entries.map((e) => e.id), embeddings: entries.map((e) => e.value), metadatas: entries.map((e) => e.metadata), documents: entries.map((e) => e.document) }); } ); } async function createChromaCollection(ai, params) { let chromaEmbedder = void 0; const embedder = params.embedder; if (!!embedder) { chromaEmbedder = { generate(texts) { return Promise.all( texts.map( (text) => ai.embed({ embedder, content: text, options: params.embedderOptions }) ) ).then((results) => { return results.map((result) => result[0].embedding); }); } }; } const clientParams = await resolve(params.clientParams); const client = new import_chromadb.ChromaClient(clientParams); return await client.createCollection({ ...params, embeddingFunction: chromaEmbedder }); } async function deleteChromaCollection(params) { const clientParams = await resolve(params.clientParams); const client = new import_chromadb.ChromaClient(clientParams); return await client.deleteCollection({ ...params }); } async function resolve(params) { if (!params) { return void 0; } if (typeof params === "function") { return await params(); } return params; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { ChromaIndexerOptionsSchema, IncludeEnum, chroma, chromaIndexer, chromaIndexerRef, chromaRetriever, chromaRetrieverRef, createChromaCollection, deleteChromaCollection }); //# sourceMappingURL=index.js.map