UNPKG

genkitx-astra-db

Version:

An Astra DB indexer and retriever for Genkit

216 lines 8.26 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getOwnPropSymbols = Object.getOwnPropertySymbols; var __hasOwnProp = Object.prototype.hasOwnProperty; var __propIsEnum = Object.prototype.propertyIsEnumerable; var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value; var __spreadValues = (a, b) => { for (var prop in b || (b = {})) if (__hasOwnProp.call(b, prop)) __defNormalProp(a, prop, b[prop]); if (__getOwnPropSymbols) for (var prop of __getOwnPropSymbols(b)) { if (__propIsEnum.call(b, prop)) __defNormalProp(a, prop, b[prop]); } return a; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var __async = (__this, __arguments, generator) => { return new Promise((resolve, reject) => { var fulfilled = (value) => { try { step(generator.next(value)); } catch (e) { reject(e); } }; var rejected = (value) => { try { step(generator.throw(value)); } catch (e) { reject(e); } }; var step = (x) => x.done ? resolve(x.value) : Promise.resolve(x.value).then(fulfilled, rejected); step((generator = generator.apply(__this, __arguments)).next()); }); }; var src_exports = {}; __export(src_exports, { astraDB: () => astraDB, astraDBIndexerRef: () => astraDBIndexerRef, astraDBRetrieverRef: () => astraDBRetrieverRef, configureAstraDBIndexer: () => configureAstraDBIndexer, configureAstraDBRetriever: () => configureAstraDBRetriever }); module.exports = __toCommonJS(src_exports); var import_retriever = require("genkit/retriever"); var import_plugin = require("genkit/plugin"); var import_genkit = require("genkit"); var import_ts_md5 = require("ts-md5"); var import_astra_db_ts = require("@datastax/astra-db-ts"); const PLUGIN_NAME = "astradb"; const DEFAULT_KEYSPACE = "default_keyspace"; const createAstraDBRetrieverOptionsSchema = () => import_retriever.CommonRetrieverOptionsSchema.extend({ filter: import_genkit.z.custom().optional() }); const AstraDBIndexerOptionsSchema = import_genkit.z.object({}); const astraDBRetrieverRef = (params) => { var _a; return (0, import_retriever.retrieverRef)({ name: `${PLUGIN_NAME}/${params.collectionName}`, info: { label: (_a = params.displayName) != null ? _a : `Astra DB - ${params.collectionName}` }, configSchema: createAstraDBRetrieverOptionsSchema() }); }; const astraDBIndexerRef = (params) => { var _a; return (0, import_retriever.indexerRef)({ name: `${PLUGIN_NAME}/${params.collectionName}`, info: { label: (_a = params.displayName) != null ? _a : `Astra DB - ${params.collectionName}` }, configSchema: AstraDBIndexerOptionsSchema }); }; function astraDB(params) { return (0, import_plugin.genkitPlugin)(PLUGIN_NAME, (ai) => __async(this, null, function* () { params.forEach((i) => configureAstraDBRetriever(ai, i)); params.forEach((i) => configureAstraDBIndexer(ai, i)); })); } function configureAstraDBRetriever(ai, params) { var _a, _b, _c; const { collectionName, embedder, embedderOptions } = params; const { applicationToken, apiEndpoint } = (_a = params.clientParams) != null ? _a : getDefaultConfig(); const keyspace = (_c = (_b = params.clientParams) == null ? void 0 : _b.keyspace) != null ? _c : DEFAULT_KEYSPACE; const client = new import_astra_db_ts.DataAPIClient(applicationToken); const db = client.db(apiEndpoint, { keyspace }); const collection = db.collection(collectionName); return ai.defineRetriever( { name: `${PLUGIN_NAME}/${collectionName}`, configSchema: createAstraDBRetrieverOptionsSchema().optional() }, (content, options) => __async(this, null, function* () { var _a2, _b2; let queryEmbeddings = []; if (embedder) { queryEmbeddings = yield ai.embed({ embedder, content, options: embedderOptions }); } const filter = (_a2 = options == null ? void 0 : options.filter) != null ? _a2 : {}; const limit = (_b2 = options == null ? void 0 : options.k) != null ? _b2 : 5; const sort = queryEmbeddings.length > 0 ? { $vector: queryEmbeddings[0].embedding } : { $vectorize: content.text }; const cursor = collection.find(filter, { sort, limit }); const results = yield cursor.toArray(); const documents = results.map((result) => { const { text, metadata } = result; return { content: [{ text }], metadata }; }); return { documents }; }) ); } function configureAstraDBIndexer(ai, params) { var _a, _b, _c; const { collectionName, embedder, embedderOptions } = __spreadValues({}, params); const { applicationToken, apiEndpoint } = (_a = params.clientParams) != null ? _a : getDefaultConfig(); const keyspace = (_c = (_b = params.clientParams) == null ? void 0 : _b.keyspace) != null ? _c : DEFAULT_KEYSPACE; const client = new import_astra_db_ts.DataAPIClient(applicationToken); const db = client.db(apiEndpoint, { keyspace }); const collection = db.collection(collectionName); return ai.defineIndexer( { name: `${PLUGIN_NAME}/${collectionName}`, configSchema: AstraDBIndexerOptionsSchema }, (docs) => __async(this, null, function* () { let documents; if (embedder) { const embeddings = yield Promise.all( docs.map( (doc) => ai.embed({ embedder, content: doc, options: embedderOptions }) ) ); documents = embeddings.flatMap((value, i) => { const doc = docs[i]; const docEmbeddings = value; const embeddingDocs = doc.getEmbeddingDocuments(docEmbeddings); return docEmbeddings.map((docEmbedding, j) => { return { _id: import_ts_md5.Md5.hashStr(JSON.stringify(embeddingDocs[j])), text: embeddingDocs[j].data, $vector: docEmbedding.embedding, metadata: embeddingDocs[j].metadata, contentType: embeddingDocs[j].dataType }; }); }); } else { documents = docs.map((doc) => ({ _id: import_ts_md5.Md5.hashStr(JSON.stringify(doc)), text: doc.text, $vectorize: doc.text, metadata: doc.metadata })); } yield collection.insertMany(documents); }) ); } function getDefaultConfig() { const maybeApiKey = process.env.ASTRA_DB_APPLICATION_TOKEN; const maybeEndpoint = process.env.ASTRA_DB_API_ENDPOINT; if (!maybeApiKey) { throw new import_genkit.GenkitError({ status: "INVALID_ARGUMENT", message: "Please pass in the API key or set ASTRA_DB_APPLICATION_TOKEN environment variable.\nFor more details see https://firebase.google.com/docs/genkit/plugins/astraDB", source: PLUGIN_NAME }); } if (!maybeEndpoint) { throw new import_genkit.GenkitError({ status: "INVALID_ARGUMENT", message: "Please pass in the Astra DB API endpoint or set ASTRA_DB_API_ENDPOINT environment variable.\nFor more details see https://firebase.google.com/docs/genkit/plugins/astraDB", source: PLUGIN_NAME }); } return { applicationToken: maybeApiKey, apiEndpoint: maybeEndpoint }; } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { astraDB, astraDBIndexerRef, astraDBRetrieverRef, configureAstraDBIndexer, configureAstraDBRetriever }); //# sourceMappingURL=index.js.map