UNPKG

genkitx-neo4j

Version:

Genkit AI framework plugin for Neo4j Graph database

299 lines (277 loc) 9.26 kB
/** * Copyright 2024 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import * as neo4j_driver from "neo4j-driver"; import { Genkit, z } from "genkit"; import { GenkitPlugin, genkitPlugin } from "genkit/plugin"; import { EmbedderArgument, Embedding } from "genkit/embedder"; import { CommonRetrieverOptionsSchema, Document, indexerRef, retrieverRef, } from "genkit/retriever"; const Neo4jRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({ k: z.number().max(1000), // filter: z.record(z.string(), z.any()).optional(), later for metadata filtering }); const Neo4jIndexerOptionsSchema = z.object({ namespace: z.string().optional(), }); export interface Neo4jGraphConfig { url: string; username: string; password: string; database?: string; } /** * neo4jRetrieverRef function creates a retriever for Neo4j. * @param params The params for the new Neo4j retriever * @param params.indexId The indexId for the Neo4j retriever * @param params.displayName A display name for the retriever. If not specified, the default label will be `Neo4j - <indexId>` * @returns A reference to a Neo4j retriever. */ export const neo4jRetrieverRef = (params: { indexId: string; displayName?: string; }) => { return retrieverRef({ name: `neo4j/${params.indexId}`, info: { label: params.displayName ?? `Neo4j - ${params.indexId}`, }, configSchema: Neo4jRetrieverOptionsSchema, }); }; /** * neo4jIndexerRef function creates an indexer for Neo4j. * @param params The params for the new Neo4j indexer. * @param params.indexId The indexId for the Neo4j indexer. * @param params.displayName A display name for the indexer. If not specified, the default label will be `Neo4j - <indexId>` * @returns A reference to a Neo4j indexer. */ export const neo4jIndexerRef = (params: { indexId: string; displayName?: string; }) => { return indexerRef({ name: `neo4j/${params.indexId}`, info: { label: params.displayName ?? `Neo4j - ${params.indexId}`, }, //configSchema: Neo4jIndexerOptionsSchema.optional(), }); }; /** * Neo4j plugin that provides a Neo4j retriever and indexer * @param params An array of params to set up Neo4j retrievers and indexers * @param params.clientParams Neo4jConfiguration containing the username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME, and NEO4J_PASSWORD environment variable will be used instead. * @param params.indexId The name of the index * @param params.embedder The embedder to use for the indexer and retriever * @param params.embedderOptions Options to customize the embedder * @returns The Neo4j Genkit plugin */ export function neo4j<EmbedderCustomOptions extends z.ZodTypeAny>( params: { clientParams?: Neo4jGraphConfig; indexId: string; embedder: EmbedderArgument<EmbedderCustomOptions>; embedderOptions?: z.infer<EmbedderCustomOptions>; }[], ): GenkitPlugin { return genkitPlugin("neo4j", async (ai: Genkit) => { params.map((i) => configureNeo4jRetriever(ai, i)); params.map((i) => configureNeo4jIndexer(ai, i)); }); } export default neo4j; /** * Configures a Neo4j retriever. * @param ai A Genkit instance * @param params The params for the retriever * @param params.indexId The name of the retriever * @param params.clientParams PNeo4jConfiguration containing the username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME, and NEO4J_PASSWORD environment variable will be used instead. * @param params.embedder The embedder to use for the retriever * @param params.embedderOptions Options to customize the embedder * @returns A Pinecone retriever */ export function configureNeo4jRetriever< EmbedderCustomOptions extends z.ZodTypeAny, >( ai: Genkit, params: { indexId: string; clientParams?: Neo4jGraphConfig; embedder: EmbedderArgument<EmbedderCustomOptions>; embedderOptions?: z.infer<EmbedderCustomOptions>; }, ) { const { indexId, embedder, embedderOptions } = { ...params, }; const neo4jConfig = params.clientParams ?? getDefaultConfig(); const neo4j_instance = neo4j_driver.driver( neo4jConfig.url, // URL (protocol://host:port) neo4j_driver.auth.basic(neo4jConfig.username, neo4jConfig.password), // Authentication ); return ai.defineRetriever( { name: `neo4j/${params.indexId}`, configSchema: Neo4jRetrieverOptionsSchema, }, async (content, options) => { const queryEmbeddings = await ai.embed({ embedder, content, options: embedderOptions, }); const retriever_query = ` CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score RETURN node.text AS text, node {.*, text: Null, embedding: Null, id: Null } AS metadata `; const response = await neo4j_instance.executeQuery( retriever_query, { k: options.k, embedding: queryEmbeddings[0].embedding, index: indexId, }, { database: neo4jConfig.database, }, ); // Create documents properly by returning the result from map const documents = response.records.map((el) => { return Document.fromText( el.get("text"), Object.fromEntries( Object.entries(el.get("metadata")).filter( ([_, value]) => value !== null, ), ), ); }); neo4j_instance.close(); return { documents: documents }; }, ); } /** * Configures a Neo4j indexer. * @param ai A Genkit instance * @param params The params for the indexer * @param params.indexId The name of the indexer * @param params.clientParams Neo4jConfiguration containing the username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME, and NEO4J_PASSWORD environment variable will be used instead. * @param params.embedder The embedder to use for the retriever * @param params.embedderOptions Options to customize the embedder * @returns A Genkit indexer */ export function configureNeo4jIndexer< EmbedderCustomOptions extends z.ZodTypeAny, >( ai: Genkit, params: { indexId: string; clientParams?: Neo4jGraphConfig; embedder: EmbedderArgument<EmbedderCustomOptions>; embedderOptions?: z.infer<EmbedderCustomOptions>; }, ) { const { indexId, embedder, embedderOptions } = { ...params, }; const neo4jConfig = params.clientParams ?? getDefaultConfig(); const neo4j_instance = neo4j_driver.driver( neo4jConfig.url, // URL (protocol://host:port) neo4j_driver.auth.basic(neo4jConfig.username, neo4jConfig.password), // Authentication ); return ai.defineIndexer( { name: `neo4j/${params.indexId}`, //configSchema: neo4jIndexerOptionsSchema.optional(), }, async (docs, options) => { const embeddings = await Promise.all( docs.map((doc) => ai.embed({ embedder, content: doc, options: embedderOptions, }), ), ); const BATCH_SIZE = 1000; for (let i = 0; i < docs.length; i += BATCH_SIZE) { const batchDocs = docs.slice(i, i + BATCH_SIZE); const batchEmbeddings = embeddings.slice(i, i + BATCH_SIZE); const batchParams = batchDocs.map((el, j) => ({ text: el.content[0]["text"], metadata: el.metadata ?? {}, embedding: batchEmbeddings[j][0]["embedding"], })); await neo4j_instance.executeQuery( ` UNWIND $data AS row CREATE (t:\`${indexId}\`) SET t.text = row.text, t += row.metadata WITH t, row.embedding AS embedding CALL db.create.setNodeVectorProperty(t, 'embedding', embedding) `, { data: batchParams }, { database: neo4jConfig.database }, ); } await neo4j_instance.executeQuery( ` CREATE VECTOR INDEX $indexName IF NOT EXISTS FOR (n:\`${indexId}\`) ON n.embedding `, { indexName: indexId }, { database: neo4jConfig.database }, ); neo4j_instance.close(); }, ); } function getDefaultConfig() { const { NEO4J_URI: url, NEO4J_USERNAME: username, NEO4J_PASSWORD: password, NEO4J_DATABASE: database, } = process.env; if (!url || !username || !password) { throw new Error( "Please provide Neo4j connection details through environment variables: NEO4J_URI, NEO4J_USERNAME, and NEO4J_PASSWORD are required.\n" + "For more details see https://neo4j.com/docs/api/javascript-driver/current/", ); } return { url, username, password, ...(database && { database }), }; }