UNPKG

genkitx-cloud-sql-pg

Version:

Genkit AI framework plugin for Cloud SQL for PostgreSQL.

317 lines 12.5 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var index_exports = {}; __export(index_exports, { Column: () => import_engine2.Column, DistanceStrategy: () => import_indexes2.DistanceStrategy, ExactNearestNeighbor: () => import_indexes2.ExactNearestNeighbor, HNSWIndex: () => import_indexes2.HNSWIndex, HNSWQueryOptions: () => import_indexes2.HNSWQueryOptions, IVFFlatIndex: () => import_indexes2.IVFFlatIndex, IVFFlatQueryOptions: () => import_indexes2.IVFFlatQueryOptions, PostgresEngine: () => import_engine2.PostgresEngine, configurePostgresIndexer: () => configurePostgresIndexer, configurePostgresRetriever: () => configurePostgresRetriever, default: () => index_default, postgres: () => postgres, postgresIndexerRef: () => postgresIndexerRef, postgresRetrieverRef: () => postgresRetrieverRef }); module.exports = __toCommonJS(index_exports); var import_genkit = require("genkit"); var import_plugin = require("genkit/plugin"); var import_retriever = require("genkit/retriever"); var import_uuid = require("uuid"); var import_indexes = require("./indexes.js"); var import_engine2 = require("./engine.js"); var import_indexes2 = require("./indexes.js"); const PostgresRetrieverOptionsSchema = import_retriever.CommonRetrieverOptionsSchema.extend({ k: import_genkit.z.number().max(1e3), filter: import_genkit.z.string().optional() }); const PostgresIndexerOptionsSchema = import_genkit.z.object({ batchSize: import_genkit.z.number().default(100) }); const postgresRetrieverRef = (params) => { return (0, import_retriever.retrieverRef)({ name: `postgres/${params.tableName}`, info: { label: params.tableName ?? `Postgres - ${params.tableName}` }, configSchema: PostgresRetrieverOptionsSchema }); }; const postgresIndexerRef = (params) => { return (0, import_retriever.indexerRef)({ name: `postgres/${params.tableName}`, info: { label: params.tableName ?? `Postgres - ${params.tableName}` }, configSchema: PostgresIndexerOptionsSchema.optional() }); }; function postgres(params) { return (0, import_plugin.genkitPlugin)("postgres", async (ai) => { params.map((i) => configurePostgresRetriever(ai, i)); params.map((i) => configurePostgresIndexer(ai, i)); }); } var index_default = postgres; async function configurePostgresRetriever(ai, params) { const schemaName = params.schemaName ?? "public"; const contentColumn = params.contentColumn ?? "content"; const embeddingColumn = params.embeddingColumn ?? "embedding"; const distanceStrategy = params.distanceStrategy ?? import_indexes.DistanceStrategy.COSINE_DISTANCE; if (!params.engine) { throw new Error("Engine is required"); } async function checkColumns() { if (params.metadataColumns !== void 0 && params.ignoreMetadataColumns !== void 0) { throw "Can not use both metadata_columns and ignore_metadata_columns."; } const { rows } = await params.engine.pool.raw( `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${params.tableName}' AND table_schema = '${schemaName}'` ); const columns = {}; for (const index in rows) { const row = rows[index]; columns[row["column_name"]] = row["data_type"]; } if (params.idColumn && !columns.hasOwnProperty(params.idColumn)) { throw `Id column: ${params.idColumn}, does not exist.`; } if (contentColumn && !columns.hasOwnProperty(contentColumn)) { throw `Content column: ${params.contentColumn}, does not exist.`; } const contentType = contentColumn ? columns[contentColumn] : ""; if (contentType !== "text" && !contentType.includes("char")) { throw `Content column: ${params.contentColumn}, is type: ${contentType}. It must be a type of character string.`; } if (embeddingColumn && !columns.hasOwnProperty(embeddingColumn)) { throw `Embedding column: ${embeddingColumn}, does not exist.`; } if (embeddingColumn && columns[embeddingColumn] !== "USER-DEFINED") { throw `Embedding column: ${embeddingColumn} is not of type Vector.`; } const metadataJsonColumnToCheck = params.metadataJsonColumn ?? ""; params.metadataJsonColumn = columns.hasOwnProperty( metadataJsonColumnToCheck ) ? params.metadataJsonColumn : ""; if (params.metadataColumns) { for (const column of params.metadataColumns) { if (column && !columns.hasOwnProperty(column)) { throw `Metadata column: ${column}, does not exist.`; } } } const allColumns = columns; if (params.ignoreMetadataColumns !== void 0 && params.ignoreMetadataColumns.length > 0) { for (const column of params.ignoreMetadataColumns) { delete allColumns[column]; } if (params.idColumn) { delete allColumns[params.idColumn]; } if (contentColumn) { delete allColumns[contentColumn]; } if (embeddingColumn) { delete allColumns[embeddingColumn]; } params.metadataColumns = Object.keys(allColumns); } } async function queryCollection(embedding, k, filter) { k = k ?? 4; const operator = distanceStrategy.operator; const searchFunction = distanceStrategy.searchFunction; const _filter = filter !== void 0 ? `WHERE ${filter}` : ""; const metadataColNames = params.metadataColumns && params.metadataColumns.length > 0 ? `"${params.metadataColumns.join('","')}"` : ""; const metadataJsonColName = params.metadataJsonColumn ? `, "${params.metadataJsonColumn}"` : ""; const query = `SELECT "${params.idColumn}", "${contentColumn}", "${embeddingColumn}", ${metadataColNames} ${metadataJsonColName}, ${searchFunction}("${embeddingColumn}", '[${embedding}]') as distance FROM "${schemaName}"."${params.tableName}" ${_filter} ORDER BY "${embeddingColumn}" ${operator} '[${embedding}]' LIMIT ${k};`; if (params.indexQueryOptions) { await params.engine.pool.raw( `SET LOCAL ${params.indexQueryOptions.to_string()}` ); } const { rows } = await params.engine.pool.raw(query); return rows; } return ai.defineRetriever( { name: `postgres/${params.tableName}`, configSchema: PostgresRetrieverOptionsSchema }, async (content, options) => { console.log(`Retrieving data for table: ${params.tableName}`); checkColumns(); const queryEmbeddings = await ai.embed({ embedder: params.embedder, content, options: params.embedderOptions }); const embedding = queryEmbeddings[0].embedding; const results = await queryCollection( embedding, options.k, options.filter ); const documents = []; for (const row of results) { const metadata = params.metadataJsonColumn && row[params.metadataJsonColumn] ? row[params.metadataJsonColumn] : {}; if (params.metadataColumns) { for (const col of params.metadataColumns) { metadata[col] = row[col]; } } documents.push( new import_retriever.Document({ content: row[contentColumn], metadata }) ); } return { documents }; } ); } function configurePostgresIndexer(ai, params) { const schemaName = params.schemaName ?? "public"; const contentColumn = params.contentColumn ?? "content"; const embeddingColumn = params.embeddingColumn ?? "embedding"; const idColumn = params.idColumn ?? "id"; const metadataJsonColumn = params.metadataJsonColumn ?? "metadata"; if (!params.engine) { throw new Error("Engine is required"); } if (params.metadataColumns && params.ignoreMetadataColumns) { throw new Error( "Cannot use both metadataColumns and ignoreMetadataColumns" ); } async function checkColumns() { const { rows } = await params.engine.pool.raw( `SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '${params.tableName}' AND table_schema = '${schemaName}'` ); const columns = {}; for (const index in rows) { const row = rows[index]; columns[row["column_name"]] = row["data_type"]; } if (!columns.hasOwnProperty(idColumn)) { throw new Error(`Id column: ${idColumn}, does not exist.`); } if (!columns.hasOwnProperty(contentColumn)) { throw new Error(`Content column: ${contentColumn}, does not exist.`); } if (!columns.hasOwnProperty(embeddingColumn)) { throw new Error(`Embedding column: ${embeddingColumn}, does not exist.`); } if (columns[embeddingColumn] !== "USER-DEFINED") { throw new Error( `Embedding column: ${embeddingColumn} is not of type Vector.` ); } if (params.metadataColumns) { for (const column of params.metadataColumns) { if (column && !columns.hasOwnProperty(column)) { throw new Error(`Metadata column: ${column}, does not exist.`); } } } } return ai.defineIndexer( { name: `postgres/${params.tableName}`, configSchema: PostgresIndexerOptionsSchema.optional() }, async (docs, options) => { try { await checkColumns(); const documents = Array.isArray(docs) ? docs : docs.documents || []; const mergedOptions = Array.isArray(docs) ? options : docs.options || options || {}; const batchSize = mergedOptions.batchSize || 100; console.log( `Indexing ${documents.length} documents in batches of ${batchSize}` ); for (let i = 0; i < documents.length; i += batchSize) { const chunk = documents.slice(i, i + batchSize); const texts = chunk.map( (doc) => Array.isArray(doc.content) ? doc.content.map((c) => c.text).join(" ") : doc.content ); let embeddings; try { if (ai.embedMany) { embeddings = await ai.embedMany({ embedder: params.embedder, content: texts, options: params.embedderOptions }); } else { embeddings = await Promise.all( texts.map( (text) => ai.embed({ embedder: params.embedder, content: text, options: params.embedderOptions }).then((res) => res[0]) ) ); } } catch (error) { throw new Error("Embedding failed", { cause: error }); } const insertData = chunk.map((doc, index) => ({ [idColumn]: doc.metadata?.[idColumn] || (0, import_uuid.v4)(), [contentColumn]: texts[index], [embeddingColumn]: JSON.stringify(embeddings[index].embedding), ...metadataJsonColumn && { [metadataJsonColumn]: doc.metadata || {} }, ...Object.fromEntries( (params.metadataColumns || []).filter((col) => doc.metadata?.[col] !== void 0).map((col) => [col, doc.metadata?.[col]]) ) })); const table = schemaName ? params.engine.pool.withSchema(schemaName).table(params.tableName) : params.engine.pool.table(params.tableName); await table.insert(insertData); } } catch (error) { console.error("Error in indexer:", error); throw error; } } ); } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { Column, DistanceStrategy, ExactNearestNeighbor, HNSWIndex, HNSWQueryOptions, IVFFlatIndex, IVFFlatQueryOptions, PostgresEngine, configurePostgresIndexer, configurePostgresRetriever, postgres, postgresIndexerRef, postgresRetrieverRef }); //# sourceMappingURL=index.js.map