UNPKG

genkitx-astra-db

Version:

An Astra DB indexer and retriever for Genkit

1 lines 11.1 kB
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["// Copyright DataStax, Inc.\n//\n// Licensed under the Apache License, Version 2.0 (the \"License\");\n// you may not use this file except in compliance with the License.\n// You may obtain a copy of the License at\n//\n// http://www.apache.org/licenses/LICENSE-2.0\n//\n// Unless required by applicable law or agreed to in writing, software\n// distributed under the License is distributed on an \"AS IS\" BASIS,\n// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n// See the License for the specific language governing permissions and\n// limitations under the License.\n\nimport { EmbedderArgument, Embedding } from \"genkit/embedder\";\nimport {\n CommonRetrieverOptionsSchema,\n indexerRef,\n retrieverRef,\n} from \"genkit/retriever\";\nimport { GenkitPlugin, genkitPlugin } from \"genkit/plugin\";\nimport { Genkit, GenkitError, z } from \"genkit\";\nimport { Md5 } from \"ts-md5\";\nimport { DataAPIClient, Filter, SomeDoc, Sort } from \"@datastax/astra-db-ts\";\n\ntype AstraDBClientOptions = {\n applicationToken: string;\n apiEndpoint: string;\n keyspace?: string;\n};\n\nconst PLUGIN_NAME = \"astradb\";\nconst DEFAULT_KEYSPACE = \"default_keyspace\";\n\nconst createAstraDBRetrieverOptionsSchema = <Schema extends SomeDoc>() =>\n CommonRetrieverOptionsSchema.extend({\n filter: z.custom<Filter<Schema>>().optional(),\n });\n\nconst AstraDBIndexerOptionsSchema = z.object({});\n\nexport const astraDBRetrieverRef = <Schema extends SomeDoc>(params: {\n collectionName: string;\n displayName?: string;\n}) => {\n return retrieverRef({\n name: `${PLUGIN_NAME}/${params.collectionName}`,\n info: {\n label: params.displayName ?? `Astra DB - ${params.collectionName}`,\n },\n configSchema: createAstraDBRetrieverOptionsSchema<Schema>(),\n });\n};\n\nexport const astraDBIndexerRef = (params: {\n collectionName: string;\n displayName?: string;\n}) => {\n return indexerRef({\n name: `${PLUGIN_NAME}/${params.collectionName}`,\n info: {\n label: params.displayName ?? `Astra DB - ${params.collectionName}`,\n },\n configSchema: AstraDBIndexerOptionsSchema,\n });\n};\n\nexport function astraDB<EmbedderCustomOptions extends z.ZodTypeAny>(\n params: {\n clientParams?: AstraDBClientOptions;\n collectionName: string;\n embedder?: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }[]\n): GenkitPlugin {\n return genkitPlugin(PLUGIN_NAME, async (ai: Genkit) => {\n params.forEach((i) => configureAstraDBRetriever(ai, i));\n params.forEach((i) => configureAstraDBIndexer(ai, i));\n });\n}\n\nexport function configureAstraDBRetriever<\n Schema extends SomeDoc,\n EmbedderCustomOptions extends z.ZodTypeAny\n>(\n ai: Genkit,\n params: {\n clientParams?: AstraDBClientOptions;\n collectionName: string;\n embedder?: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { collectionName, embedder, embedderOptions } = params;\n const { applicationToken, apiEndpoint } =\n params.clientParams ?? getDefaultConfig();\n const keyspace = params.clientParams?.keyspace ?? DEFAULT_KEYSPACE;\n\n const client = new DataAPIClient(applicationToken);\n const db = client.db(apiEndpoint, { keyspace });\n const collection = db.collection<Schema>(collectionName);\n\n return ai.defineRetriever(\n {\n name: `${PLUGIN_NAME}/${collectionName}`,\n configSchema: createAstraDBRetrieverOptionsSchema<Schema>().optional(),\n },\n async (content, options) => {\n let queryEmbeddings: Embedding[] = [];\n if (embedder) {\n queryEmbeddings = await ai.embed({\n embedder,\n content,\n options: embedderOptions,\n });\n }\n const filter = options?.filter ?? {};\n const limit = options?.k ?? 5;\n const sort: Sort =\n queryEmbeddings.length > 0\n ? { $vector: queryEmbeddings[0].embedding }\n : { $vectorize: content.text };\n\n const cursor = collection.find(filter, { sort, limit });\n const results = await cursor.toArray();\n const documents = results.map((result) => {\n const { text, metadata } = result;\n return { content: [{ text }], metadata };\n });\n return { documents };\n }\n );\n}\n\nexport function configureAstraDBIndexer<\n EmbedderCustomOptions extends z.ZodTypeAny\n>(\n ai: Genkit,\n params: {\n clientParams?: AstraDBClientOptions;\n collectionName: string;\n embedder?: EmbedderArgument<EmbedderCustomOptions>;\n embedderOptions?: z.infer<EmbedderCustomOptions>;\n }\n) {\n const { collectionName, embedder, embedderOptions } = {\n ...params,\n };\n const { applicationToken, apiEndpoint } =\n params.clientParams ?? getDefaultConfig();\n const keyspace = params.clientParams?.keyspace ?? DEFAULT_KEYSPACE;\n\n const client = new DataAPIClient(applicationToken);\n const db = client.db(apiEndpoint, { keyspace });\n const collection = db.collection(collectionName);\n\n return ai.defineIndexer(\n {\n name: `${PLUGIN_NAME}/${collectionName}`,\n configSchema: AstraDBIndexerOptionsSchema,\n },\n async (docs) => {\n let documents;\n\n if (embedder) {\n const embeddings = await Promise.all(\n docs.map((doc) =>\n ai.embed({\n embedder,\n content: doc,\n options: embedderOptions,\n })\n )\n );\n\n documents = embeddings.flatMap((value, i) => {\n const doc = docs[i];\n const docEmbeddings: Embedding[] = value;\n\n // Create one doc per docEmbedding so we can store them 1:1.\n // They should be unique because the embedding metadata is\n // added to the new docs.\n const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings);\n\n return docEmbeddings.map((docEmbedding, j) => {\n return {\n _id: Md5.hashStr(JSON.stringify(embeddingDocs[j])),\n text: embeddingDocs[j].data,\n $vector: docEmbedding.embedding,\n metadata: embeddingDocs[j].metadata,\n contentType: embeddingDocs[j].dataType,\n };\n });\n });\n } else {\n documents = docs.map((doc) => ({\n _id: Md5.hashStr(JSON.stringify(doc)),\n text: doc.text,\n $vectorize: doc.text,\n metadata: doc.metadata,\n }));\n }\n\n await collection.insertMany(documents);\n }\n );\n}\n\nfunction getDefaultConfig(): AstraDBClientOptions {\n const maybeApiKey = process.env.ASTRA_DB_APPLICATION_TOKEN;\n const maybeEndpoint = process.env.ASTRA_DB_API_ENDPOINT;\n if (!maybeApiKey) {\n throw new GenkitError({\n status: \"INVALID_ARGUMENT\",\n message:\n \"Please pass in the API key or set ASTRA_DB_APPLICATION_TOKEN environment variable.\\n\" +\n \"For more details see https://firebase.google.com/docs/genkit/plugins/astraDB\",\n source: PLUGIN_NAME,\n });\n }\n if (!maybeEndpoint) {\n throw new GenkitError({\n status: \"INVALID_ARGUMENT\",\n message:\n \"Please pass in the Astra DB API endpoint or set ASTRA_DB_API_ENDPOINT environment variable.\\n\" +\n \"For more details see https://firebase.google.com/docs/genkit/plugins/astraDB\",\n source: PLUGIN_NAME,\n });\n }\n return {\n applicationToken: maybeApiKey,\n apiEndpoint: maybeEndpoint,\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAeA;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAuB,oBAAoB;AAC3C,SAAiB,aAAa,SAAS;AACvC,SAAS,WAAW;AACpB,SAAS,qBAA4C;AAQrD,MAAM,cAAc;AACpB,MAAM,mBAAmB;AAEzB,MAAM,sCAAsC,MAC1C,6BAA6B,OAAO;AAAA,EAClC,QAAQ,EAAE,OAAuB,EAAE,SAAS;AAC9C,CAAC;AAEH,MAAM,8BAA8B,EAAE,OAAO,CAAC,CAAC;AAExC,MAAM,sBAAsB,CAAyB,WAGtD;AA5CN;AA6CE,SAAO,aAAa;AAAA,IAClB,MAAM,GAAG,WAAW,IAAI,OAAO,cAAc;AAAA,IAC7C,MAAM;AAAA,MACJ,QAAO,YAAO,gBAAP,YAAsB,cAAc,OAAO,cAAc;AAAA,IAClE;AAAA,IACA,cAAc,oCAA4C;AAAA,EAC5D,CAAC;AACH;AAEO,MAAM,oBAAoB,CAAC,WAG5B;AAzDN;AA0DE,SAAO,WAAW;AAAA,IAChB,MAAM,GAAG,WAAW,IAAI,OAAO,cAAc;AAAA,IAC7C,MAAM;AAAA,MACJ,QAAO,YAAO,gBAAP,YAAsB,cAAc,OAAO,cAAc;AAAA,IAClE;AAAA,IACA,cAAc;AAAA,EAChB,CAAC;AACH;AAEO,SAAS,QACd,QAMc;AACd,SAAO,aAAa,aAAa,CAAO,OAAe;AACrD,WAAO,QAAQ,CAAC,MAAM,0BAA0B,IAAI,CAAC,CAAC;AACtD,WAAO,QAAQ,CAAC,MAAM,wBAAwB,IAAI,CAAC,CAAC;AAAA,EACtD,EAAC;AACH;AAEO,SAAS,0BAId,IACA,QAMA;AA5FF;AA6FE,QAAM,EAAE,gBAAgB,UAAU,gBAAgB,IAAI;AACtD,QAAM,EAAE,kBAAkB,YAAY,KACpC,YAAO,iBAAP,YAAuB,iBAAiB;AAC1C,QAAM,YAAW,kBAAO,iBAAP,mBAAqB,aAArB,YAAiC;AAElD,QAAM,SAAS,IAAI,cAAc,gBAAgB;AACjD,QAAM,KAAK,OAAO,GAAG,aAAa,EAAE,SAAS,CAAC;AAC9C,QAAM,aAAa,GAAG,WAAmB,cAAc;AAEvD,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,GAAG,WAAW,IAAI,cAAc;AAAA,MACtC,cAAc,oCAA4C,EAAE,SAAS;AAAA,IACvE;AAAA,IACA,CAAO,SAAS,YAAY;AA3GhC,UAAAA,KAAAC;AA4GM,UAAI,kBAA+B,CAAC;AACpC,UAAI,UAAU;AACZ,0BAAkB,MAAM,GAAG,MAAM;AAAA,UAC/B;AAAA,UACA;AAAA,UACA,SAAS;AAAA,QACX,CAAC;AAAA,MACH;AACA,YAAM,UAASD,MAAA,mCAAS,WAAT,OAAAA,MAAmB,CAAC;AACnC,YAAM,SAAQC,MAAA,mCAAS,MAAT,OAAAA,MAAc;AAC5B,YAAM,OACJ,gBAAgB,SAAS,IACrB,EAAE,SAAS,gBAAgB,CAAC,EAAE,UAAU,IACxC,EAAE,YAAY,QAAQ,KAAK;AAEjC,YAAM,SAAS,WAAW,KAAK,QAAQ,EAAE,MAAM,MAAM,CAAC;AACtD,YAAM,UAAU,MAAM,OAAO,QAAQ;AACrC,YAAM,YAAY,QAAQ,IAAI,CAAC,WAAW;AACxC,cAAM,EAAE,MAAM,SAAS,IAAI;AAC3B,eAAO,EAAE,SAAS,CAAC,EAAE,KAAK,CAAC,GAAG,SAAS;AAAA,MACzC,CAAC;AACD,aAAO,EAAE,UAAU;AAAA,IACrB;AAAA,EACF;AACF;AAEO,SAAS,wBAGd,IACA,QAMA;AAhJF;AAiJE,QAAM,EAAE,gBAAgB,UAAU,gBAAgB,IAAI,mBACjD;AAEL,QAAM,EAAE,kBAAkB,YAAY,KACpC,YAAO,iBAAP,YAAuB,iBAAiB;AAC1C,QAAM,YAAW,kBAAO,iBAAP,mBAAqB,aAArB,YAAiC;AAElD,QAAM,SAAS,IAAI,cAAc,gBAAgB;AACjD,QAAM,KAAK,OAAO,GAAG,aAAa,EAAE,SAAS,CAAC;AAC9C,QAAM,aAAa,GAAG,WAAW,cAAc;AAE/C,SAAO,GAAG;AAAA,IACR;AAAA,MACE,MAAM,GAAG,WAAW,IAAI,cAAc;AAAA,MACtC,cAAc;AAAA,IAChB;AAAA,IACA,CAAO,SAAS;AACd,UAAI;AAEJ,UAAI,UAAU;AACZ,cAAM,aAAa,MAAM,QAAQ;AAAA,UAC/B,KAAK;AAAA,YAAI,CAAC,QACR,GAAG,MAAM;AAAA,cACP;AAAA,cACA,SAAS;AAAA,cACT,SAAS;AAAA,YACX,CAAC;AAAA,UACH;AAAA,QACF;AAEA,oBAAY,WAAW,QAAQ,CAAC,OAAO,MAAM;AAC3C,gBAAM,MAAM,KAAK,CAAC;AAClB,gBAAM,gBAA6B;AAKnC,gBAAM,gBAAgB,IAAI,sBAAsB,aAAa;AAE7D,iBAAO,cAAc,IAAI,CAAC,cAAc,MAAM;AAC5C,mBAAO;AAAA,cACL,KAAK,IAAI,QAAQ,KAAK,UAAU,cAAc,CAAC,CAAC,CAAC;AAAA,cACjD,MAAM,cAAc,CAAC,EAAE;AAAA,cACvB,SAAS,aAAa;AAAA,cACtB,UAAU,cAAc,CAAC,EAAE;AAAA,cAC3B,aAAa,cAAc,CAAC,EAAE;AAAA,YAChC;AAAA,UACF,CAAC;AAAA,QACH,CAAC;AAAA,MACH,OAAO;AACL,oBAAY,KAAK,IAAI,CAAC,SAAS;AAAA,UAC7B,KAAK,IAAI,QAAQ,KAAK,UAAU,GAAG,CAAC;AAAA,UACpC,MAAM,IAAI;AAAA,UACV,YAAY,IAAI;AAAA,UAChB,UAAU,IAAI;AAAA,QAChB,EAAE;AAAA,MACJ;AAEA,YAAM,WAAW,WAAW,SAAS;AAAA,IACvC;AAAA,EACF;AACF;AAEA,SAAS,mBAAyC;AAChD,QAAM,cAAc,QAAQ,IAAI;AAChC,QAAM,gBAAgB,QAAQ,IAAI;AAClC,MAAI,CAAC,aAAa;AAChB,UAAM,IAAI,YAAY;AAAA,MACpB,QAAQ;AAAA,MACR,SACE;AAAA,MAEF,QAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACA,MAAI,CAAC,eAAe;AAClB,UAAM,IAAI,YAAY;AAAA,MACpB,QAAQ;AAAA,MACR,SACE;AAAA,MAEF,QAAQ;AAAA,IACV,CAAC;AAAA,EACH;AACA,SAAO;AAAA,IACL,kBAAkB;AAAA,IAClB,aAAa;AAAA,EACf;AACF;","names":["_a","_b"]}