@langchain/community
Version:
Third-party integrations for LangChain.js
439 lines (438 loc) • 18.2 kB
JavaScript
Object.defineProperty(exports, Symbol.toStringTag, { value: "Module" });
const require_runtime = require("../_virtual/_rolldown/runtime.cjs");
let neo4j_driver = require("neo4j-driver");
neo4j_driver = require_runtime.__toESM(neo4j_driver);
let _langchain_core_utils_hash = require("@langchain/core/utils/hash");
//#region src/graphs/neo4j_graph.ts
var neo4j_graph_exports = /* @__PURE__ */ require_runtime.__exportAll({
BASE_ENTITY_LABEL: () => BASE_ENTITY_LABEL,
Neo4jGraph: () => Neo4jGraph
});
const BASE_ENTITY_LABEL = "__Entity__";
const DISTINCT_VALUE_LIMIT = 10;
const LIST_LIMIT = 128;
const EXHAUSTIVE_SEARCH_LIMIT = 1e4;
const EXCLUDED_LABELS = ["Bloom_Perspective", "Bloom_Scene"];
const EXCLUDED_RELS = ["Bloom_HAS_SCENE"];
const INCLUDE_DOCS_QUERY = `
MERGE (d:Document {id:$document.metadata.id})
SET d.text = $document.pageContent
SET d += $document.metadata
WITH d
`;
const NODE_PROPERTIES_QUERY = `
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "node"
AND NOT label IN $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {labels: nodeLabels, properties: properties} AS output
`;
const REL_PROPERTIES_QUERY = `
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE NOT type = "RELATIONSHIP" AND elementType = "relationship"
AND NOT label in $EXCLUDED_LABELS
WITH label AS nodeLabels, collect({property:property, type:type}) AS properties
RETURN {type: nodeLabels, properties: properties} AS output
`;
const REL_QUERY = `
CALL apoc.meta.data()
YIELD label, other, elementType, type, property
WHERE type = "RELATIONSHIP" AND elementType = "node"
UNWIND other AS other_node
WITH * WHERE NOT label IN $EXCLUDED_LABELS
AND NOT other_node IN $EXCLUDED_LABELS
RETURN {start: label, type: property, end: toString(other_node)} AS output
`;
function isDistinctMoreThanLimit(distinct_count = 11, limit = DISTINCT_VALUE_LIMIT) {
return distinct_count !== void 0 && distinct_count > limit;
}
function cleanStringValues(text) {
return text.replaceAll("\n", " ").replaceAll("\r", " ");
}
function formatSchema(schema, isEnhanced) {
let formattedNodeProps = [];
let formattedRelProps = [];
if (isEnhanced) {
for (const [nodeType, properties] of Object.entries(schema.nodeProps)) {
formattedNodeProps.push(`- **${nodeType}**`);
for (const prop of properties) {
let example = "";
if (prop.type === "STRING") {
if (prop.values.length > 0) if (isDistinctMoreThanLimit(prop.distinct_count)) example = `Example: ${cleanStringValues(prop.values[0])}`;
else example = `Available options: ${prop.values.map(cleanStringValues).join(", ")}`;
} else if ([
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME"
].includes(prop.type)) {
if (prop.min !== void 0) example = `Min: ${prop.min}, Max: ${prop.max}`;
else if (prop.values.length > 0) example = `Example: ${prop.values[0]}`;
} else if (prop.type === "LIST") {
if (!prop.min_size || prop.min_size > LIST_LIMIT) continue;
example = `Min Size: ${prop.min_size}, Max Size: ${prop.max_size}`;
}
formattedNodeProps.push(` - \`${prop.property}\`: ${prop.type} ${example}`);
}
}
for (const [relType, properties] of Object.entries(schema.relProps)) {
formattedRelProps.push(`- **${relType}**`);
for (const prop of properties) {
let example = "";
if (prop.type === "STRING") {
if (prop.values.length > 0) if (isDistinctMoreThanLimit(prop.distinct_count)) example = `Example: ${cleanStringValues(prop.values[0])}`;
else example = `Available options: ${prop.values.map(cleanStringValues).join(", ")}`;
} else if ([
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME"
].includes(prop.type)) {
if (prop.min) example = `Min: ${prop.min}, Max: ${prop.max}`;
else if (prop.values) example = `Example: ${prop.values[0]}`;
} else if (prop.type === "LIST") {
if (prop.min_size > LIST_LIMIT) continue;
example = `Min Size: ${prop.min_size}, Max Size: ${prop.max_size}`;
}
formattedRelProps.push(` - \`${prop.property}\`: ${prop.type} ${example}`);
}
}
} else {
formattedNodeProps = Object.entries(schema.nodeProps).map(([key, value]) => {
return `${key} {${value.map((prop) => `${prop.property}: ${prop.type}`).join(", ")}}`;
});
formattedRelProps = Object.entries(schema.relProps).map(([key, value]) => {
return `${key} {${value.map((prop) => `${prop.property}: ${prop.type} `).join(", ")} } `;
});
}
const formattedRels = schema.relationships.map((el) => `(: ${el.start}) - [: ${el.type}] -> (:${el.end})`);
return [
"Node properties are the following:",
formattedNodeProps?.join(", "),
"Relationship properties are the following:",
formattedRelProps?.join(", "),
"The relationships are the following:",
formattedRels?.join(", ")
].join("\n");
}
/**
* @security *Security note*: Make sure that the database connection uses credentials
* that are narrowly-scoped to only include necessary permissions.
* Failure to do so may result in data corruption or loss, since the calling
* code may attempt commands that would result in deletion, mutation
* of data if appropriately prompted or reading sensitive data if such
* data is present in the database.
* The best way to guard against such negative outcomes is to (as appropriate)
* limit the permissions granted to the credentials used with this tool.
* For example, creating read only users for the database is a good way to
* ensure that the calling code cannot mutate or delete data.
*
* @link See https://js.langchain.com/docs/security for more information.
*/
var Neo4jGraph = class Neo4jGraph {
driver;
database;
timeoutMs;
enhancedSchema;
schema = "";
structuredSchema = {
nodeProps: {},
relProps: {},
relationships: [],
metadata: {
constraint: {},
index: {}
}
};
constructor({ url, username, password, database = "neo4j", timeoutMs, enhancedSchema = false }) {
try {
this.driver = neo4j_driver.default.driver(url, neo4j_driver.default.auth.basic(username, password));
this.database = database;
this.timeoutMs = timeoutMs;
this.enhancedSchema = enhancedSchema;
} catch {
throw new Error("Could not create a Neo4j driver instance. Please check the connection details.");
}
}
static async initialize(config) {
const graph = new Neo4jGraph(config);
await graph.verifyConnectivity();
try {
await graph.refreshSchema();
} catch (error) {
if (error.code === "Neo.ClientError.Procedure.ProcedureNotFound") throw new Error("Could not use APOC procedures. Please ensure the APOC plugin is installed in Neo4j and that 'apoc.meta.data()' is allowed in Neo4j configuration.");
throw error;
} finally {
console.log("Schema refreshed successfully.");
}
return graph;
}
getSchema() {
return this.schema;
}
getStructuredSchema() {
return this.structuredSchema;
}
async query(query, params = {}, routing = neo4j_driver.default.routing.WRITE) {
return toObjects((await this.driver.executeQuery(query, params, {
database: this.database,
routing,
transactionConfig: { timeout: this.timeoutMs }
})).records);
}
async verifyConnectivity() {
await this.driver.getServerInfo();
}
async refreshSchema() {
const nodeProperties = (await this.query(NODE_PROPERTIES_QUERY, { EXCLUDED_LABELS: EXCLUDED_LABELS.concat([BASE_ENTITY_LABEL]) }))?.map((el) => el.output);
const relationshipsProperties = (await this.query(REL_PROPERTIES_QUERY, { EXCLUDED_LABELS: EXCLUDED_RELS }))?.map((el) => el.output);
const relationships = (await this.query(REL_QUERY, { EXCLUDED_LABELS: EXCLUDED_LABELS.concat([BASE_ENTITY_LABEL]) }))?.map((el) => el.output);
const constraint = await this.query("SHOW CONSTRAINTS");
const index = await this.query("SHOW INDEXES YIELD *");
this.structuredSchema = {
nodeProps: Object.fromEntries(nodeProperties?.map((el) => [el.labels, el.properties]) || []),
relProps: Object.fromEntries(relationshipsProperties?.map((el) => [el.type, el.properties]) || []),
relationships: relationships || [],
metadata: {
constraint,
index
}
};
if (this.enhancedSchema) {
const schemaCounts = await this.query("CALL apoc.meta.graphSample() YIELD nodes, relationships RETURN nodes, [rel in relationships | {name: apoc.any.property(rel, 'type'), count: apoc.any.property(rel, 'count')}] AS relationships");
for (const node of schemaCounts[0].nodes) {
if (EXCLUDED_LABELS.includes(node.name)) continue;
const nodeProps = this.structuredSchema.nodeProps[node.name];
if (!nodeProps) continue;
const enhancedCypher = await this.enhancedSchemaCypher(node.name, nodeProps, node.count < EXHAUSTIVE_SEARCH_LIMIT);
const enhancedInfo = (await this.query(enhancedCypher))[0].output;
for (const prop of nodeProps) if (enhancedInfo[prop.property]) Object.assign(prop, enhancedInfo[prop.property]);
}
for (const rel of schemaCounts[0].relationships) {
if (EXCLUDED_RELS.includes(rel.name)) continue;
const relProps = this.structuredSchema.relProps[rel.name];
if (!relProps) continue;
const enhancedCypher = await this.enhancedSchemaCypher(rel.name, relProps, rel.count < EXHAUSTIVE_SEARCH_LIMIT, true);
const enhancedInfo = (await this.query(enhancedCypher))[0].output;
for (const prop of relProps) if (prop.property in enhancedInfo) Object.assign(prop, enhancedInfo[prop.property]);
}
}
this.schema = formatSchema(this.structuredSchema, this.enhancedSchema);
}
async enhancedSchemaCypher(labelOrType, properties, exhaustive, isRelationship = false) {
let matchClause = isRelationship ? `MATCH ()-[n:\`${labelOrType}\`]->()` : `MATCH (n:\`${labelOrType}\`)`;
const withClauses = [];
const returnClauses = [];
const outputDict = {};
if (exhaustive) for (const prop of properties) {
const propName = prop.property;
const propType = prop.type;
if (propType === "STRING") {
withClauses.push(`collect(distinct substring(n.\`${propName}\`, 0, 50)) AS \`${propName}_values\``);
returnClauses.push(`values: \`${propName}_values\`[..${DISTINCT_VALUE_LIMIT}], distinct_count: size(\`${propName}_values\`)`);
} else if ([
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME"
].includes(propType)) {
withClauses.push(`min(n.\`${propName}\`) AS \`${propName}_min\``);
withClauses.push(`max(n.\`${propName}\`) AS \`${propName}_max\``);
withClauses.push(`count(distinct n.\`${propName}\`) AS \`${propName}_distinct\``);
returnClauses.push(`min: toString(\`${propName}_min\`), max: toString(\`${propName}_max\`), distinct_count: \`${propName}_distinct\``);
} else if (propType === "LIST") {
withClauses.push(`min(size(n.\`${propName}\`)) AS \`${propName}_size_min\`, max(size(n.\`${propName}\`)) AS \`${propName}_size_max\``);
returnClauses.push(`min_size: \`${propName}_size_min\`, max_size: \`${propName}_size_max\``);
} else if ([
"BOOLEAN",
"POINT",
"DURATION"
].includes(propType)) continue;
outputDict[propName] = `{${returnClauses.pop()}}`;
}
else {
matchClause += ` WITH n LIMIT 5`;
for (const prop of properties) {
const propName = prop.property;
const propType = prop.type;
const propIndex = this.structuredSchema?.metadata?.index.filter((el) => el.label === labelOrType && el.properties[0] === propName && el.type === "RANGE");
if (propType === "STRING") if (propIndex.length > 0 && propIndex[0].size > 0 && propIndex[0].distinctValues <= DISTINCT_VALUE_LIMIT) {
const distinctValues = (await this.query(`CALL apoc.schema.properties.distinct('${labelOrType}', '${propName}') YIELD value`))[0].value;
returnClauses.push(`values: ${distinctValues}, distinct_count: ${distinctValues.length}`);
} else {
withClauses.push(`collect(distinct substring(n.\`${propName}\`, 0, 50)) AS \`${propName}_values\``);
returnClauses.push(`values: ${propName}_values`);
}
else if ([
"INTEGER",
"FLOAT",
"DATE",
"DATE_TIME",
"LOCAL_DATE_TIME"
].includes(propType)) if (!propIndex) {
withClauses.push(`collect(distinct toString(n.\`${propName}\`)) AS \`${propName}_values\``);
returnClauses.push(`values: ${propName}_values`);
} else {
withClauses.push(`min(n.\`${propName}\`) AS \`${propName}_min\``);
withClauses.push(`max(n.\`${propName}\`) AS \`${propName}_max\``);
withClauses.push(`count(distinct n.\`${propName}\`) AS \`${propName}_distinct\``);
returnClauses.push(`min: toString(\`${propName}_min\`), max: toString(\`${propName}_max\`), distinct_count: \`${propName}_distinct\``);
}
else if (propType === "LIST") {
withClauses.push(`min(size(n.\`${propName}\`)) AS \`${propName}_size_min\`, max(size(n.\`${propName}\`)) AS \`${propName}_size_max\``);
returnClauses.push(`min_size: \`${propName}_size_min\`, max_size: \`${propName}_size_max\``);
} else if ([
"BOOLEAN",
"POINT",
"DURATION"
].includes(propType)) continue;
outputDict[propName] = `{${returnClauses.pop()}}`;
}
}
const withClause = `WITH ${withClauses.join(", ")}`;
const returnClause = `RETURN {${Object.entries(outputDict).map(([k, v]) => `\`${k}\`: ${v}`).join(", ")}} AS output`;
return [
matchClause,
withClause,
returnClause
].join("\n");
}
async addGraphDocuments(graphDocuments, config = {}) {
const { baseEntityLabel } = config;
if (baseEntityLabel) {
if (!(this.structuredSchema?.metadata?.constraint?.some((el) => JSON.stringify(el.labelsOrTypes) === JSON.stringify(["__Entity__"]) && JSON.stringify(el.properties) === JSON.stringify(["id"])) ?? false)) {
await this.query(`
CREATE CONSTRAINT IF NOT EXISTS FOR (b:${BASE_ENTITY_LABEL})
REQUIRE b.id IS UNIQUE;
`);
await this.refreshSchema();
}
}
const nodeImportQuery = getNodeImportQuery(config);
const relImportQuery = getRelImportQuery(config);
for (const document of graphDocuments) {
if (!document.source.metadata.id) document.source.metadata.id = (0, _langchain_core_utils_hash.sha256)(document.source.pageContent);
await this.query(nodeImportQuery, {
data: document.nodes.map((el) => ({ ...el })),
document: { ...document.source }
});
await this.query(relImportQuery, { data: document.relationships.map((el) => ({
source: el.source.id,
source_label: el.source.type,
target: el.target.id,
target_label: el.target.type,
type: el.type.replace(/ /g, "_").toUpperCase(),
properties: el.properties
})) });
}
}
async close() {
await this.driver.close();
}
};
function getNodeImportQuery({ baseEntityLabel, includeSource }) {
if (baseEntityLabel) return `
${includeSource ? INCLUDE_DOCS_QUERY : ""}
UNWIND $data AS row
MERGE(source: \`${BASE_ENTITY_LABEL}\` {id: row.id})
SET source += row.properties
${includeSource ? "MERGE (d)-[:MENTIONS]->(source)" : ""}
WITH source, row
CALL apoc.create.addLabels(source, [row.type]) YIELD node
RETURN distinct 'done' AS result
`;
else return `
${includeSource ? INCLUDE_DOCS_QUERY : ""}
UNWIND $data AS row
CALL apoc.merge.node([row.type], {id: row.id},
row.properties, {}) YIELD node
${includeSource ? "MERGE (d)-[:MENTIONS]->(node)" : ""}
RETURN distinct 'done' AS result
`;
}
function getRelImportQuery({ baseEntityLabel }) {
if (baseEntityLabel) return `
UNWIND $data AS row
MERGE (source:\`${BASE_ENTITY_LABEL}\` {id: row.source})
MERGE (target:\`${BASE_ENTITY_LABEL}\` {id: row.target})
WITH source, target, row
CALL apoc.merge.relationship(source, row.type,
{}, row.properties, target) YIELD rel
RETURN distinct 'done'
`;
else return `
UNWIND $data AS row
CALL apoc.merge.node([row.source_label], {id: row.source},
{}, {}) YIELD node as source
CALL apoc.merge.node([row.target_label], {id: row.target},
{}, {}) YIELD node as target
CALL apoc.merge.relationship(source, row.type,
{}, row.properties, target) YIELD rel
RETURN distinct 'done'
`;
}
function toObjects(records) {
return records.map((record) => {
const rObj = record.toObject();
const out = {};
Object.keys(rObj).forEach((key) => {
out[key] = itemIntToString(rObj[key]);
});
return out;
});
}
function itemIntToString(item) {
if (neo4j_driver.default.isInt(item)) return item.toString();
if (Array.isArray(item)) return item.map((ii) => itemIntToString(ii));
if ([
"number",
"string",
"boolean"
].indexOf(typeof item) !== -1) return item;
if (item === null) return item;
if (typeof item === "object") return objIntToString(item);
}
function objIntToString(obj) {
const entry = extractFromNeoObjects(obj);
let newObj = null;
if (Array.isArray(entry)) newObj = entry.map((item) => itemIntToString(item));
else if (entry !== null && typeof entry === "object") {
newObj = {};
Object.keys(entry).forEach((key) => {
newObj[key] = itemIntToString(entry[key]);
});
}
return newObj;
}
function extractFromNeoObjects(obj) {
if (obj instanceof neo4j_driver.default.types.Node || obj instanceof neo4j_driver.default.types.Relationship) return obj.properties;
else if (obj instanceof neo4j_driver.default.types.Path) return [].concat.apply([], extractPathForRows(obj));
return obj;
}
const extractPathForRows = (path) => {
let { segments } = path;
if (!Array.isArray(path.segments) || path.segments.length < 1) segments = [{
...path,
end: null
}];
return segments.map((segment) => [
objIntToString(segment.start),
objIntToString(segment.relationship),
objIntToString(segment.end)
].filter((part) => part !== null));
};
//#endregion
exports.BASE_ENTITY_LABEL = BASE_ENTITY_LABEL;
exports.Neo4jGraph = Neo4jGraph;
Object.defineProperty(exports, "neo4j_graph_exports", {
enumerable: true,
get: function() {
return neo4j_graph_exports;
}
});
//# sourceMappingURL=neo4j_graph.cjs.map