UNPKG

genkitx-astra-db

Version:

An Astra DB indexer and retriever for Genkit

194 lines 6.97 kB
var __defProp = Object.defineProperty; var __getOwnPropSymbols = Object.getOwnPropertySymbols; var __hasOwnProp = Object.prototype.hasOwnProperty; var __propIsEnum = Object.prototype.propertyIsEnumerable; var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __spreadValues = (a, b) => { for (var prop in b || (b = {})) if (__hasOwnProp.call(b, prop)) __defNormalProp(a, prop, b[prop]); if (__getOwnPropSymbols) for (var prop of __getOwnPropSymbols(b)) { if (__propIsEnum.call(b, prop)) __defNormalProp(a, prop, b[prop]); } return a; }; var __async = (__this, __arguments, generator) => { return new Promise((resolve, reject) => { var fulfilled = (value) => { try { step(generator.next(value)); } catch (e) { reject(e); } }; var rejected = (value) => { try { step(generator.throw(value)); } catch (e) { reject(e); } }; var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected); step((generator = generator.apply(__this, __arguments)).next()); }); }; import { CommonRetrieverOptionsSchema, indexerRef, retrieverRef } from "genkit/retriever"; import { genkitPlugin } from "genkit/plugin"; import { GenkitError, z } from "genkit"; import { Md5 } from "ts-md5"; import { DataAPIClient } from "@datastax/astra-db-ts"; const PLUGIN_NAME = "astradb"; const DEFAULT_KEYSPACE = "default_keyspace"; const createAstraDBRetrieverOptionsSchema = () => CommonRetrieverOptionsSchema.extend({ filter: z.custom().optional() }); const AstraDBIndexerOptionsSchema = z.object({}); const astraDBRetrieverRef = (params) => { var _a; return retrieverRef({ name: `${PLUGIN_NAME}/${params.collectionName}`, info: { label: (_a = params.displayName) != null ? _a : `Astra DB - ${params.collectionName}` }, configSchema: createAstraDBRetrieverOptionsSchema() }); }; const astraDBIndexerRef = (params) => { var _a; return indexerRef({ name: `${PLUGIN_NAME}/${params.collectionName}`, info: { label: (_a = params.displayName) != null ? _a : `Astra DB - ${params.collectionName}` }, configSchema: AstraDBIndexerOptionsSchema }); }; function astraDB(params) { return genkitPlugin(PLUGIN_NAME, (ai) => __async(this, null, function* () { params.forEach((i) => configureAstraDBRetriever(ai, i)); params.forEach((i) => configureAstraDBIndexer(ai, i)); })); } function configureAstraDBRetriever(ai, params) { var _a, _b, _c; const { collectionName, embedder, embedderOptions } = params; const { applicationToken, apiEndpoint } = (_a = params.clientParams) != null ? _a : getDefaultConfig(); const keyspace = (_c = (_b = params.clientParams) == null ? void 0 : _b.keyspace) != null ? _c : DEFAULT_KEYSPACE; const client = new DataAPIClient(applicationToken); const db = client.db(apiEndpoint, { keyspace }); const collection = db.collection(collectionName); return ai.defineRetriever( { name: `${PLUGIN_NAME}/${collectionName}`, configSchema: createAstraDBRetrieverOptionsSchema().optional() }, (content, options) => __async(this, null, function* () { var _a2, _b2; let queryEmbeddings = []; if (embedder) { queryEmbeddings = yield ai.embed({ embedder, content, options: embedderOptions }); } const filter = (_a2 = options == null ? void 0 : options.filter) != null ? _a2 : {}; const limit = (_b2 = options == null ? void 0 : options.k) != null ? _b2 : 5; const sort = queryEmbeddings.length > 0 ? { $vector: queryEmbeddings[0].embedding } : { $vectorize: content.text }; const cursor = collection.find(filter, { sort, limit }); const results = yield cursor.toArray(); const documents = results.map((result) => { const { text, metadata } = result; return { content: [{ text }], metadata }; }); return { documents }; }) ); } function configureAstraDBIndexer(ai, params) { var _a, _b, _c; const { collectionName, embedder, embedderOptions } = __spreadValues({}, params); const { applicationToken, apiEndpoint } = (_a = params.clientParams) != null ? _a : getDefaultConfig(); const keyspace = (_c = (_b = params.clientParams) == null ? void 0 : _b.keyspace) != null ? _c : DEFAULT_KEYSPACE; const client = new DataAPIClient(applicationToken); const db = client.db(apiEndpoint, { keyspace }); const collection = db.collection(collectionName); return ai.defineIndexer( { name: `${PLUGIN_NAME}/${collectionName}`, configSchema: AstraDBIndexerOptionsSchema }, (docs) => __async(this, null, function* () { let documents; if (embedder) { const embeddings = yield Promise.all( docs.map( (doc) => ai.embed({ embedder, content: doc, options: embedderOptions }) ) ); documents = embeddings.flatMap((value, i) => { const doc = docs[i]; const docEmbeddings = value; const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings); return docEmbeddings.map((docEmbedding, j) => { return { _id: Md5.hashStr(JSON.stringify(embeddingDocs[j])), text: embeddingDocs[j].data, $vector: docEmbedding.embedding, metadata: embeddingDocs[j].metadata, contentType: embeddingDocs[j].dataType }; }); }); } else { documents = docs.map((doc) => ({ _id: Md5.hashStr(JSON.stringify(doc)), text: doc.text, $vectorize: doc.text, metadata: doc.metadata })); } yield collection.insertMany(documents); }) ); } function getDefaultConfig() { const maybeApiKey = process.env.ASTRA_DB_APPLICATION_TOKEN; const maybeEndpoint = process.env.ASTRA_DB_API_ENDPOINT; if (!maybeApiKey) { throw new GenkitError({ status: "INVALID_ARGUMENT", message: "Please pass in the API key or set ASTRA_DB_APPLICATION_TOKEN environment variable.\nFor more details see https://firebase.google.com/docs/genkit/plugins/astraDB", source: PLUGIN_NAME }); } if (!maybeEndpoint) { throw new GenkitError({ status: "INVALID_ARGUMENT", message: "Please pass in the Astra DB API endpoint or set ASTRA_DB_API_ENDPOINT environment variable.\nFor more details see https://firebase.google.com/docs/genkit/plugins/astraDB", source: PLUGIN_NAME }); } return { applicationToken: maybeApiKey, apiEndpoint: maybeEndpoint }; } export { astraDB, astraDBIndexerRef, astraDBRetrieverRef, configureAstraDBIndexer, configureAstraDBRetriever }; //# sourceMappingURL=index.mjs.map