genkitx-neo4j
Version:
Genkit AI framework plugin for Neo4j Graph database
299 lines (277 loc) • 9.26 kB
text/typescript
/**
* Copyright 2024 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import * as neo4j_driver from "neo4j-driver";
import { Genkit, z } from "genkit";
import { GenkitPlugin, genkitPlugin } from "genkit/plugin";
import { EmbedderArgument, Embedding } from "genkit/embedder";
import {
CommonRetrieverOptionsSchema,
Document,
indexerRef,
retrieverRef,
} from "genkit/retriever";
const Neo4jRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({
k: z.number().max(1000),
// filter: z.record(z.string(), z.any()).optional(), later for metadata filtering
});
const Neo4jIndexerOptionsSchema = z.object({
namespace: z.string().optional(),
});
export interface Neo4jGraphConfig {
url: string;
username: string;
password: string;
database?: string;
}
/**
* neo4jRetrieverRef function creates a retriever for Neo4j.
* @param params The params for the new Neo4j retriever
* @param params.indexId The indexId for the Neo4j retriever
* @param params.displayName A display name for the retriever.
If not specified, the default label will be `Neo4j - <indexId>`
* @returns A reference to a Neo4j retriever.
*/
export const neo4jRetrieverRef = (params: {
indexId: string;
displayName?: string;
}) => {
return retrieverRef({
name: `neo4j/${params.indexId}`,
info: {
label: params.displayName ?? `Neo4j - ${params.indexId}`,
},
configSchema: Neo4jRetrieverOptionsSchema,
});
};
/**
* neo4jIndexerRef function creates an indexer for Neo4j.
* @param params The params for the new Neo4j indexer.
* @param params.indexId The indexId for the Neo4j indexer.
* @param params.displayName A display name for the indexer.
If not specified, the default label will be `Neo4j - <indexId>`
* @returns A reference to a Neo4j indexer.
*/
export const neo4jIndexerRef = (params: {
indexId: string;
displayName?: string;
}) => {
return indexerRef({
name: `neo4j/${params.indexId}`,
info: {
label: params.displayName ?? `Neo4j - ${params.indexId}`,
},
//configSchema: Neo4jIndexerOptionsSchema.optional(),
});
};
/**
* Neo4j plugin that provides a Neo4j retriever and indexer
* @param params An array of params to set up Neo4j retrievers and indexers
* @param params.clientParams Neo4jConfiguration containing the
username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME,
and NEO4J_PASSWORD environment variable will be used instead.
* @param params.indexId The name of the index
* @param params.embedder The embedder to use for the indexer and retriever
* @param params.embedderOptions Options to customize the embedder
* @returns The Neo4j Genkit plugin
*/
export function neo4j<EmbedderCustomOptions extends z.ZodTypeAny>(
params: {
clientParams?: Neo4jGraphConfig;
indexId: string;
embedder: EmbedderArgument<EmbedderCustomOptions>;
embedderOptions?: z.infer<EmbedderCustomOptions>;
}[],
): GenkitPlugin {
return genkitPlugin("neo4j", async (ai: Genkit) => {
params.map((i) => configureNeo4jRetriever(ai, i));
params.map((i) => configureNeo4jIndexer(ai, i));
});
}
export default neo4j;
/**
* Configures a Neo4j retriever.
* @param ai A Genkit instance
* @param params The params for the retriever
* @param params.indexId The name of the retriever
* @param params.clientParams PNeo4jConfiguration containing the
username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME,
and NEO4J_PASSWORD environment variable will be used instead.
* @param params.embedder The embedder to use for the retriever
* @param params.embedderOptions Options to customize the embedder
* @returns A Pinecone retriever
*/
export function configureNeo4jRetriever<
EmbedderCustomOptions extends z.ZodTypeAny,
>(
ai: Genkit,
params: {
indexId: string;
clientParams?: Neo4jGraphConfig;
embedder: EmbedderArgument<EmbedderCustomOptions>;
embedderOptions?: z.infer<EmbedderCustomOptions>;
},
) {
const { indexId, embedder, embedderOptions } = {
...params,
};
const neo4jConfig = params.clientParams ?? getDefaultConfig();
const neo4j_instance = neo4j_driver.driver(
neo4jConfig.url, // URL (protocol://host:port)
neo4j_driver.auth.basic(neo4jConfig.username, neo4jConfig.password), // Authentication
);
return ai.defineRetriever(
{
name: `neo4j/${params.indexId}`,
configSchema: Neo4jRetrieverOptionsSchema,
},
async (content, options) => {
const queryEmbeddings = await ai.embed({
embedder,
content,
options: embedderOptions,
});
const retriever_query = `
CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score
RETURN node.text AS text, node {.*, text: Null,
embedding: Null, id: Null } AS metadata
`;
const response = await neo4j_instance.executeQuery(
retriever_query,
{
k: options.k,
embedding: queryEmbeddings[0].embedding,
index: indexId,
},
{
database: neo4jConfig.database,
},
);
// Create documents properly by returning the result from map
const documents = response.records.map((el) => {
return Document.fromText(
el.get("text"),
Object.fromEntries(
Object.entries(el.get("metadata")).filter(
([_, value]) => value !== null,
),
),
);
});
neo4j_instance.close();
return { documents: documents };
},
);
}
/**
* Configures a Neo4j indexer.
* @param ai A Genkit instance
* @param params The params for the indexer
* @param params.indexId The name of the indexer
* @param params.clientParams Neo4jConfiguration containing the
username, password, and url. If not set, the NEO4J_URI, NEO4J_USERNAME,
and NEO4J_PASSWORD environment variable will be used instead.
* @param params.embedder The embedder to use for the retriever
* @param params.embedderOptions Options to customize the embedder
* @returns A Genkit indexer
*/
export function configureNeo4jIndexer<
EmbedderCustomOptions extends z.ZodTypeAny,
>(
ai: Genkit,
params: {
indexId: string;
clientParams?: Neo4jGraphConfig;
embedder: EmbedderArgument<EmbedderCustomOptions>;
embedderOptions?: z.infer<EmbedderCustomOptions>;
},
) {
const { indexId, embedder, embedderOptions } = {
...params,
};
const neo4jConfig = params.clientParams ?? getDefaultConfig();
const neo4j_instance = neo4j_driver.driver(
neo4jConfig.url, // URL (protocol://host:port)
neo4j_driver.auth.basic(neo4jConfig.username, neo4jConfig.password), // Authentication
);
return ai.defineIndexer(
{
name: `neo4j/${params.indexId}`,
//configSchema: neo4jIndexerOptionsSchema.optional(),
},
async (docs, options) => {
const embeddings = await Promise.all(
docs.map((doc) =>
ai.embed({
embedder,
content: doc,
options: embedderOptions,
}),
),
);
const BATCH_SIZE = 1000;
for (let i = 0; i < docs.length; i += BATCH_SIZE) {
const batchDocs = docs.slice(i, i + BATCH_SIZE);
const batchEmbeddings = embeddings.slice(i, i + BATCH_SIZE);
const batchParams = batchDocs.map((el, j) => ({
text: el.content[0]["text"],
metadata: el.metadata ?? {},
embedding: batchEmbeddings[j][0]["embedding"],
}));
await neo4j_instance.executeQuery(
`
UNWIND $data AS row
CREATE (t:\`${indexId}\`)
SET t.text = row.text,
t += row.metadata
WITH t, row.embedding AS embedding
CALL db.create.setNodeVectorProperty(t, 'embedding', embedding)
`,
{ data: batchParams },
{ database: neo4jConfig.database },
);
}
await neo4j_instance.executeQuery(
`
CREATE VECTOR INDEX $indexName IF NOT EXISTS
FOR (n:\`${indexId}\`) ON n.embedding
`,
{ indexName: indexId },
{ database: neo4jConfig.database },
);
neo4j_instance.close();
},
);
}
function getDefaultConfig() {
const {
NEO4J_URI: url,
NEO4J_USERNAME: username,
NEO4J_PASSWORD: password,
NEO4J_DATABASE: database,
} = process.env;
if (!url || !username || !password) {
throw new Error(
"Please provide Neo4j connection details through environment variables: NEO4J_URI, NEO4J_USERNAME, and NEO4J_PASSWORD are required.\n" +
"For more details see https://neo4j.com/docs/api/javascript-driver/current/",
);
}
return {
url,
username,
password,
...(database && { database }),
};
}