UNPKG

genkitx-chromadb

Version:

Genkit AI framework plugin for Chroma vector database.

1 lines 16.1 kB
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["/**\n * Copyright 2024 Google LLC\n *\n * Licensed under the Apache License, Version 2.0 (the \"License\");\n * you may not use this file except in compliance with the License.\n * You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport {\n ChromaClient,\n Collection,\n CollectionMetadata,\n Embeddings,\n IEmbeddingFunction,\n IncludeEnum,\n Metadata,\n ChromaClientParams as NativeChromaClientParams,\n Where,\n WhereDocument,\n} from 'chromadb';\nimport {\n Document,\n EmbedderArgument,\n Embedding,\n Genkit,\n indexerRef,\n retrieverRef,\n z,\n} from 'genkit';\nimport { GenkitPlugin, genkitPlugin } from 'genkit/plugin';\nimport { CommonRetrieverOptionsSchema } from 'genkit/retriever';\nimport { Md5 } from 'ts-md5';\n\nexport { IncludeEnum };\n\nconst WhereSchema: z.ZodType<Where> = z.any();\nconst WhereDocumentSchema: z.ZodType<WhereDocument> = z.any();\n\nconst IncludeOptionSchema = z\n .array(z.enum(['documents', 'embeddings', 'metadatas', 'distances']))\n .optional();\ntype IncludeOption = z.infer<typeof IncludeOptionSchema>;\n\nconst ChromaRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({\n include: IncludeOptionSchema,\n where: WhereSchema.optional(),\n whereDocument: WhereDocumentSchema.optional(),\n});\n\nexport const ChromaIndexerOptionsSchema = z.null().optional();\n\ntype ChromaClientParams =\n | NativeChromaClientParams\n | (() => Promise<NativeChromaClientParams>);\n\ntype ChromaPluginParams<\n EmbedderCustomOptions extends z.ZodTypeAny = z.ZodTypeAny,\n> = {\n clientParams?: ChromaClientParams;\n collectionName: string;\n createCollectionIfMissing?: boolean;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n}[];\n\n/**\n * Chroma plugin that provides the Chroma retriever and indexer\n */\nexport function chroma<EmbedderCustomOptions extends z.ZodTypeAny>(\n params: ChromaPluginParams<EmbedderCustomOptions>\n): GenkitPlugin {\n return genkitPlugin('chroma', async (ai: Genkit) => {\n params.map((i) => chromaRetriever(ai, i));\n params.map((i) => chromaIndexer(ai, i));\n });\n}\n\nexport const chromaRetrieverRef = (params: {\n collectionName: string;\n displayName?: string;\n}) => {\n return retrieverRef({\n name: `chroma/${params.collectionName}`,\n info: {\n label: params.displayName ?? `Chroma DB - ${params.collectionName}`,\n },\n configSchema: ChromaRetrieverOptionsSchema.optional(),\n });\n};\n\nexport const chromaIndexerRef = (params: {\n collectionName: string;\n displayName?: string;\n}) => {\n return indexerRef({\n name: `chroma/${params.collectionName}`,\n info: {\n label: params.displayName ?? `Chroma DB - ${params.collectionName}`,\n },\n configSchema: ChromaIndexerOptionsSchema.optional(),\n });\n};\n\n/**\n * Configures a Chroma vector store retriever.\n */\nexport function chromaRetriever<EmbedderCustomOptions extends z.ZodTypeAny>(\n ai: Genkit,\n params: {\n clientParams?: ChromaClientParams;\n collectionName: string;\n createCollectionIfMissing?: boolean;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { embedder, collectionName, embedderOptions } = params;\n return ai.defineRetriever(\n {\n name: `chroma/${collectionName}`,\n configSchema: ChromaRetrieverOptionsSchema.optional(),\n },\n async (content, options) => {\n const clientParams = await resolve(params.clientParams);\n const client = new ChromaClient(clientParams);\n let collection: Collection;\n if (params.createCollectionIfMissing) {\n collection = await client.getOrCreateCollection({\n name: collectionName,\n });\n } else {\n collection = await client.getCollection({\n name: collectionName,\n });\n }\n\n const queryEmbeddings = await ai.embed({\n embedder,\n content,\n options: embedderOptions,\n });\n const results = await collection.query({\n nResults: options?.k,\n include: getIncludes(options?.include),\n where: options?.where,\n whereDocument: options?.whereDocument,\n queryEmbeddings: queryEmbeddings[0].embedding,\n });\n\n const documents = results.documents[0];\n const metadatas = results.metadatas;\n const embeddings = results.embeddings;\n const distances = results.distances;\n\n const combined = documents\n .map((d, i) => {\n if (d !== null) {\n return {\n document: d,\n metadata: constructMetadata(i, metadatas, embeddings, distances),\n };\n }\n return undefined;\n })\n .filter(\n (r): r is { document: string; metadata: Record<string, any> } => !!r\n );\n\n return {\n documents: combined.map((result) => {\n const data = result.document;\n const metadata = result.metadata.metadata[0];\n const dataType = metadata.dataType;\n const docMetadata = metadata.docMetadata\n ? JSON.parse(metadata.docMetadata)\n : undefined;\n return Document.fromData(data, dataType, docMetadata).toJSON();\n }),\n };\n }\n );\n}\n\n/**\n * Helper method to compute effective Include enum. It always\n * includes documents\n */\nfunction getIncludes(includes: IncludeOption): IncludeEnum[] | undefined {\n if (!includes) {\n // Default behaviour\n return undefined;\n }\n\n // Always include documents\n let effectiveIncludes = [IncludeEnum.Documents];\n effectiveIncludes = effectiveIncludes.concat(includes as IncludeEnum[]);\n const includesSet = new Set(effectiveIncludes);\n return Array.from(includesSet);\n}\n\n/**\n * Helper method to construct metadata, including the optional {@link IncludeEnum} passed in config.\n */\nfunction constructMetadata(\n i: number,\n metadatas: (Metadata | null)[][],\n embeddings: Embeddings[] | null,\n distances: number[][] | null\n): unknown {\n var fullMetadata: Record<string, unknown> = {};\n if (metadatas && metadatas[i]) {\n fullMetadata.metadata = metadatas[i];\n }\n if (embeddings) {\n fullMetadata.embedding = embeddings[i];\n }\n if (distances) {\n fullMetadata.distances = distances[i];\n }\n return fullMetadata;\n}\n\n/**\n * Configures a Chroma indexer.\n */\nexport function chromaIndexer<EmbedderCustomOptions extends z.ZodTypeAny>(\n ai: Genkit,\n params: {\n clientParams?: ChromaClientParams;\n collectionName: string;\n createCollectionIfMissing?: boolean;\n embedder: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { collectionName, embedder, embedderOptions } = {\n ...params,\n };\n\n return ai.defineIndexer(\n {\n name: `chroma/${params.collectionName}`,\n configSchema: ChromaIndexerOptionsSchema,\n },\n async (docs) => {\n const clientParams = await resolve(params.clientParams);\n const client = new ChromaClient(clientParams);\n\n let collection: Collection;\n if (params.createCollectionIfMissing) {\n collection = await client.getOrCreateCollection({\n name: collectionName,\n });\n } else {\n collection = await client.getCollection({\n name: collectionName,\n });\n }\n\n const embeddings = await Promise.all(\n docs.map((doc) =>\n ai.embed({\n embedder,\n content: doc,\n options: embedderOptions,\n })\n )\n );\n\n const entries = embeddings\n .map((value, i) => {\n const doc = docs[i];\n // The array of embeddings for this document\n const docEmbeddings: Embedding[] = value;\n const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings);\n return docEmbeddings.map((docEmbedding, j) => {\n const metadata: Metadata = {\n docMetadata: JSON.stringify(embeddingDocs[j].metadata),\n dataType: embeddingDocs[j].dataType || '',\n };\n\n const data = embeddingDocs[j].data;\n const id = Md5.hashStr(JSON.stringify(embeddingDocs[j]));\n return {\n id,\n value: docEmbedding.embedding,\n document: data,\n metadata,\n };\n });\n })\n .reduce((acc, val) => {\n return acc.concat(val);\n }, []);\n\n await collection.add({\n ids: entries.map((e) => e.id),\n embeddings: entries.map((e) => e.value),\n metadatas: entries.map((e) => e.metadata),\n documents: entries.map((e) => e.document),\n });\n }\n );\n}\n\n/**\n * Helper function for creating Chroma collections.\n * Currently only available for text\n * https://docs.trychroma.com/docs/embeddings/multimodal\n */\nexport async function createChromaCollection<\n EmbedderCustomOptions extends z.ZodTypeAny,\n>(\n ai: Genkit,\n params: {\n name: string;\n clientParams?: ChromaClientParams;\n metadata?: CollectionMetadata;\n embedder?: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n let chromaEmbedder: IEmbeddingFunction | undefined = undefined;\n const embedder = params.embedder;\n if (!!embedder) {\n chromaEmbedder = {\n generate(texts: string[]) {\n return Promise.all(\n texts.map((text) =>\n ai.embed({\n embedder,\n content: text,\n options: params.embedderOptions,\n })\n )\n ).then((results: Embedding[][]) => {\n return results.map((result: Embedding[]) => result[0].embedding);\n });\n },\n };\n }\n const clientParams = await resolve(params.clientParams);\n const client = new ChromaClient(clientParams);\n return await client.createCollection({\n ...params,\n embeddingFunction: chromaEmbedder,\n });\n}\n\n/**\n * Helper function for deleting Chroma collections.\n */\nexport async function deleteChromaCollection(params: {\n name: string;\n clientParams?: ChromaClientParams;\n}) {\n const clientParams = await resolve(params.clientParams);\n const client = new ChromaClient(clientParams);\n return await client.deleteCollection({\n ...params,\n });\n}\n\nasync function resolve(\n params?: ChromaClientParams\n): Promise<NativeChromaClientParams | undefined> {\n if (!params) {\n return undefined;\n }\n if (typeof params === 'function') {\n return await params();\n }\n return params;\n}\n"],"mappings":"AAgBA;AAAA,EACE;AAAA,EAKA;AAAA,OAKK;AACP;AAAA,EACE;AAAA,EAIA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAuB,oBAAoB;AAC3C,SAAS,oCAAoC;AAC7C,SAAS,WAAW;AAIpB,MAAM,cAAgC,EAAE,IAAI;AAC5C,MAAM,sBAAgD,EAAE,IAAI;AAE5D,MAAM,sBAAsB,EACzB,MAAM,EAAE,KAAK,CAAC,aAAa,cAAc,aAAa,WAAW,CAAC,CAAC,EACnE,SAAS;AAGZ,MAAM,+BAA+B,6BAA6B,OAAO;AAAA,EACvE,SAAS;AAAA,EACT,OAAO,YAAY,SAAS;AAAA,EAC5B,eAAe,oBAAoB,SAAS;AAC9C,CAAC;AAEM,MAAM,6BAA6B,EAAE,KAAK,EAAE,SAAS;AAmBrD,SAAS,OACd,QACc;AACd,SAAO,aAAa,UAAU,OAAO,OAAe;AAClD,WAAO,IAAI,CAAC,MAAM,gBAAgB,IAAI,CAAC,CAAC;AACxC,WAAO,IAAI,CAAC,MAAM,cAAc,IAAI,CAAC,CAAC;AAAA,EACxC,CAAC;AACH;AAEO,MAAM,qBAAqB,CAAC,WAG7B;AACJ,SAAO,aAAa;AAAA,IAClB,MAAM,UAAU,OAAO,cAAc;AAAA,IACrC,MAAM;AAAA,MACJ,OAAO,OAAO,eAAe,eAAe,OAAO,cAAc;AAAA,IACnE;AAAA,IACA,cAAc,6BAA6B,SAAS;AAAA,EACtD,CAAC;AACH;AAEO,MAAM,mBAAmB,CAAC,WAG3B;AACJ,SAAO,WAAW;AAAA,IAChB,MAAM,UAAU,OAAO,cAAc;AAAA,IACrC,MAAM;AAAA,MACJ,OAAO,OAAO,eAAe,eAAe,OAAO,cAAc;AAAA,IACnE;AAAA,IACA,cAAc,2BAA2B,SAAS;AAAA,EACpD,CAAC;AACH;AAKO,SAAS,gBACd,IACA,QAOA;AACA,QAAM,EAAE,UAAU,gBAAgB,gBAAgB,IAAI;AACtD,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,UAAU,cAAc;AAAA,MAC9B,cAAc,6BAA6B,SAAS;AAAA,IACtD;AAAA,IACA,OAAO,SAAS,YAAY;AAC1B,YAAM,eAAe,MAAM,QAAQ,OAAO,YAAY;AACtD,YAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,UAAI;AACJ,UAAI,OAAO,2BAA2B;AACpC,qBAAa,MAAM,OAAO,sBAAsB;AAAA,UAC9C,MAAM;AAAA,QACR,CAAC;AAAA,MACH,OAAO;AACL,qBAAa,MAAM,OAAO,cAAc;AAAA,UACtC,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAEA,YAAM,kBAAkB,MAAM,GAAG,MAAM;AAAA,QACrC;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX,CAAC;AACD,YAAM,UAAU,MAAM,WAAW,MAAM;AAAA,QACrC,UAAU,SAAS;AAAA,QACnB,SAAS,YAAY,SAAS,OAAO;AAAA,QACrC,OAAO,SAAS;AAAA,QAChB,eAAe,SAAS;AAAA,QACxB,iBAAiB,gBAAgB,CAAC,EAAE;AAAA,MACtC,CAAC;AAED,YAAM,YAAY,QAAQ,UAAU,CAAC;AACrC,YAAM,YAAY,QAAQ;AAC1B,YAAM,aAAa,QAAQ;AAC3B,YAAM,YAAY,QAAQ;AAE1B,YAAM,WAAW,UACd,IAAI,CAAC,GAAG,MAAM;AACb,YAAI,MAAM,MAAM;AACd,iBAAO;AAAA,YACL,UAAU;AAAA,YACV,UAAU,kBAAkB,GAAG,WAAW,YAAY,SAAS;AAAA,UACjE;AAAA,QACF;AACA,eAAO;AAAA,MACT,CAAC,EACA;AAAA,QACC,CAAC,MAAgE,CAAC,CAAC;AAAA,MACrE;AAEF,aAAO;AAAA,QACL,WAAW,SAAS,IAAI,CAAC,WAAW;AAClC,gBAAM,OAAO,OAAO;AACpB,gBAAM,WAAW,OAAO,SAAS,SAAS,CAAC;AAC3C,gBAAM,WAAW,SAAS;AAC1B,gBAAM,cAAc,SAAS,cACzB,KAAK,MAAM,SAAS,WAAW,IAC/B;AACJ,iBAAO,SAAS,SAAS,MAAM,UAAU,WAAW,EAAE,OAAO;AAAA,QAC/D,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AACF;AAMA,SAAS,YAAY,UAAoD;AACvE,MAAI,CAAC,UAAU;AAEb,WAAO;AAAA,EACT;AAGA,MAAI,oBAAoB,CAAC,YAAY,SAAS;AAC9C,sBAAoB,kBAAkB,OAAO,QAAyB;AACtE,QAAM,cAAc,IAAI,IAAI,iBAAiB;AAC7C,SAAO,MAAM,KAAK,WAAW;AAC/B;AAKA,SAAS,kBACP,GACA,WACA,YACA,WACS;AACT,MAAI,eAAwC,CAAC;AAC7C,MAAI,aAAa,UAAU,CAAC,GAAG;AAC7B,iBAAa,WAAW,UAAU,CAAC;AAAA,EACrC;AACA,MAAI,YAAY;AACd,iBAAa,YAAY,WAAW,CAAC;AAAA,EACvC;AACA,MAAI,WAAW;AACb,iBAAa,YAAY,UAAU,CAAC;AAAA,EACtC;AACA,SAAO;AACT;AAKO,SAAS,cACd,IACA,QAOA;AACA,QAAM,EAAE,gBAAgB,UAAU,gBAAgB,IAAI;AAAA,IACpD,GAAG;AAAA,EACL;AAEA,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,UAAU,OAAO,cAAc;AAAA,MACrC,cAAc;AAAA,IAChB;AAAA,IACA,OAAO,SAAS;AACd,YAAM,eAAe,MAAM,QAAQ,OAAO,YAAY;AACtD,YAAM,SAAS,IAAI,aAAa,YAAY;AAE5C,UAAI;AACJ,UAAI,OAAO,2BAA2B;AACpC,qBAAa,MAAM,OAAO,sBAAsB;AAAA,UAC9C,MAAM;AAAA,QACR,CAAC;AAAA,MACH,OAAO;AACL,qBAAa,MAAM,OAAO,cAAc;AAAA,UACtC,MAAM;AAAA,QACR,CAAC;AAAA,MACH;AAEA,YAAM,aAAa,MAAM,QAAQ;AAAA,QAC/B,KAAK;AAAA,UAAI,CAAC,QACR,GAAG,MAAM;AAAA,YACP;AAAA,YACA,SAAS;AAAA,YACT,SAAS;AAAA,UACX,CAAC;AAAA,QACH;AAAA,MACF;AAEA,YAAM,UAAU,WACb,IAAI,CAAC,OAAO,MAAM;AACjB,cAAM,MAAM,KAAK,CAAC;AAElB,cAAM,gBAA6B;AACnC,cAAM,gBAAgB,IAAI,sBAAsB,aAAa;AAC7D,eAAO,cAAc,IAAI,CAAC,cAAc,MAAM;AAC5C,gBAAM,WAAqB;AAAA,YACzB,aAAa,KAAK,UAAU,cAAc,CAAC,EAAE,QAAQ;AAAA,YACrD,UAAU,cAAc,CAAC,EAAE,YAAY;AAAA,UACzC;AAEA,gBAAM,OAAO,cAAc,CAAC,EAAE;AAC9B,gBAAM,KAAK,IAAI,QAAQ,KAAK,UAAU,cAAc,CAAC,CAAC,CAAC;AACvD,iBAAO;AAAA,YACL;AAAA,YACA,OAAO,aAAa;AAAA,YACpB,UAAU;AAAA,YACV;AAAA,UACF;AAAA,QACF,CAAC;AAAA,MACH,CAAC,EACA,OAAO,CAAC,KAAK,QAAQ;AACpB,eAAO,IAAI,OAAO,GAAG;AAAA,MACvB,GAAG,CAAC,CAAC;AAEP,YAAM,WAAW,IAAI;AAAA,QACnB,KAAK,QAAQ,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,QAC5B,YAAY,QAAQ,IAAI,CAAC,MAAM,EAAE,KAAK;AAAA,QACtC,WAAW,QAAQ,IAAI,CAAC,MAAM,EAAE,QAAQ;AAAA,QACxC,WAAW,QAAQ,IAAI,CAAC,MAAM,EAAE,QAAQ;AAAA,MAC1C,CAAC;AAAA,IACH;AAAA,EACF;AACF;AAOA,eAAsB,uBAGpB,IACA,QAOA;AACA,MAAI,iBAAiD;AACrD,QAAM,WAAW,OAAO;AACxB,MAAI,CAAC,CAAC,UAAU;AACd,qBAAiB;AAAA,MACf,SAAS,OAAiB;AACxB,eAAO,QAAQ;AAAA,UACb,MAAM;AAAA,YAAI,CAAC,SACT,GAAG,MAAM;AAAA,cACP;AAAA,cACA,SAAS;AAAA,cACT,SAAS,OAAO;AAAA,YAClB,CAAC;AAAA,UACH;AAAA,QACF,EAAE,KAAK,CAAC,YAA2B;AACjC,iBAAO,QAAQ,IAAI,CAAC,WAAwB,OAAO,CAAC,EAAE,SAAS;AAAA,QACjE,CAAC;AAAA,MACH;AAAA,IACF;AAAA,EACF;AACA,QAAM,eAAe,MAAM,QAAQ,OAAO,YAAY;AACtD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,SAAO,MAAM,OAAO,iBAAiB;AAAA,IACnC,GAAG;AAAA,IACH,mBAAmB;AAAA,EACrB,CAAC;AACH;AAKA,eAAsB,uBAAuB,QAG1C;AACD,QAAM,eAAe,MAAM,QAAQ,OAAO,YAAY;AACtD,QAAM,SAAS,IAAI,aAAa,YAAY;AAC5C,SAAO,MAAM,OAAO,iBAAiB;AAAA,IACnC,GAAG;AAAA,EACL,CAAC;AACH;AAEA,eAAe,QACb,QAC+C;AAC/C,MAAI,CAAC,QAAQ;AACX,WAAO;AAAA,EACT;AACA,MAAI,OAAO,WAAW,YAAY;AAChC,WAAO,MAAM,OAAO;AAAA,EACtB;AACA,SAAO;AACT;","names":[]}