UNPKG

graphzep

Version:

GraphZep: A temporal knowledge graph memory system for AI agents based on the Zep paper

653 lines (548 loc) 18.6 kB
import { BaseGraphDriver } from './driver.js'; import { GraphProvider } from '../types/index.js'; import { NamespaceManager } from '../rdf/namespaces.js'; import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); export interface RDFDriverConfig { uri?: string; username?: string; password?: string; database?: string; inMemory?: boolean; sparqlEndpoint?: string; customOntologyPath?: string; cacheSize?: number; batchSize?: number; } export interface RDFTriple { subject: string; predicate: string; object: string | { value: string; type: string; datatype?: string; language?: string }; } // Simple interfaces for the optimized driver interface MemoryBuffer { add(memory: any): Promise<void>; flush(): Promise<void>; size(): number; } interface BackgroundQueue { schedule(task: () => Promise<void>): void; start(): void; stop(): Promise<void>; } interface TTLCache<K, V> { set(key: K, value: V): void; get(key: K): V | undefined; has(key: K): boolean; delete(key: K): boolean; clear(): void; } interface LRUCache<K, V> { set(key: K, value: V): void; get(key: K): V | undefined; has(key: K): boolean; delete(key: K): boolean; clear(): void; max: number; } class SimpleMemoryBuffer implements MemoryBuffer { private buffer: any[] = []; private maxSize: number; constructor(maxSize = 1000) { this.maxSize = maxSize; } async add(memory: any): Promise<void> { this.buffer.push(memory); if (this.buffer.length >= this.maxSize) { await this.flush(); } } async flush(): Promise<void> { this.buffer = []; } size(): number { return this.buffer.length; } } class SimpleBackgroundQueue implements BackgroundQueue { private tasks: (() => Promise<void>)[] = []; private processing = false; private running = false; schedule(task: () => Promise<void>): void { this.tasks.push(task); if (!this.processing && this.running) { this.processQueue(); } } start(): void { this.running = true; if (this.tasks.length > 0) { this.processQueue(); } } async stop(): Promise<void> { this.running = false; while (this.processing) { await new Promise(resolve => setTimeout(resolve, 100)); } } private async processQueue(): Promise<void> { if (this.processing) return; this.processing = true; while (this.tasks.length > 0 && this.running) { const task = this.tasks.shift(); if (task) { try { await task(); } catch (error) { console.error('Background queue task failed:', error); } } } this.processing = false; } } class SimpleLRUCache<K, V> implements LRUCache<K, V> { private cache = new Map<K, V>(); public max: number; constructor(options: { max: number }) { this.max = options.max; } set(key: K, value: V): void { if (this.cache.size >= this.max && !this.cache.has(key)) { const firstKey = this.cache.keys().next().value; if (firstKey !== undefined) { this.cache.delete(firstKey); } } this.cache.delete(key); // Remove if exists to update position this.cache.set(key, value); } get(key: K): V | undefined { if (this.cache.has(key)) { const value = this.cache.get(key)!; this.cache.delete(key); this.cache.set(key, value); // Move to end return value; } return undefined; } has(key: K): boolean { return this.cache.has(key); } delete(key: K): boolean { return this.cache.delete(key); } clear(): void { this.cache.clear(); } } class SimpleTTLCache<K, V> implements TTLCache<K, V> { private cache = new Map<K, { value: V; expires: number }>(); private ttl: number; constructor(ttl = 300000) { // 5 minutes default this.ttl = ttl; } set(key: K, value: V): void { this.cache.set(key, { value, expires: Date.now() + this.ttl }); } get(key: K): V | undefined { const entry = this.cache.get(key); if (!entry) return undefined; if (Date.now() > entry.expires) { this.cache.delete(key); return undefined; } return entry.value; } has(key: K): boolean { return this.get(key) !== undefined; } delete(key: K): boolean { return this.cache.delete(key); } clear(): void { this.cache.clear(); } } export class OptimizedRDFDriver extends BaseGraphDriver { provider = GraphProvider.RDF; private triples: RDFTriple[] = []; private namespaceManager: NamespaceManager; private config: RDFDriverConfig; // Performance optimization components private writeBuffer: MemoryBuffer; private hotCache: LRUCache<string, any>; private queryCache: TTLCache<string, any>; private backgroundProcessor: BackgroundQueue; // Internal state private isConnected = false; private defaultOntologyLoaded = false; constructor(config: RDFDriverConfig = {}) { super( config.uri || 'memory://graphzep', config.username || '', config.password || '', config.database || 'default' ); this.config = { inMemory: true, cacheSize: 10000, batchSize: 1000, ...config }; this.namespaceManager = new NamespaceManager(); // Initialize performance components this.writeBuffer = new SimpleMemoryBuffer(this.config.batchSize); this.hotCache = new SimpleLRUCache({ max: this.config.cacheSize || 10000 }); this.queryCache = new SimpleTTLCache(300000); // 5 min TTL this.backgroundProcessor = new SimpleBackgroundQueue(); } async connect(): Promise<void> { if (this.isConnected) return; try { // Load default Zep ontology await this.loadDefaultOntology(); // Load custom ontology if specified if (this.config.customOntologyPath) { await this.loadCustomOntology(this.config.customOntologyPath); } // Start background processing this.backgroundProcessor.start(); this.isConnected = true; } catch (error) { throw new Error(`Failed to connect RDF driver: ${error}`); } } async close(): Promise<void> { if (!this.isConnected) return; // Flush any pending writes await this.writeBuffer.flush(); // Stop background processing await this.backgroundProcessor.stop(); // Clear caches this.hotCache.clear(); this.queryCache.clear(); this.isConnected = false; } async createIndexes(): Promise<void> { // RDF stores typically handle indexing automatically // This is a placeholder for future optimization } async executeQuery<T = any>(query: string, params?: Record<string, any>): Promise<T> { if (!this.isConnected) { throw new Error('RDF driver not connected'); } // Check query cache first const cacheKey = `${query}:${JSON.stringify(params)}`; if (this.queryCache.has(cacheKey)) { return this.queryCache.get(cacheKey) as T; } try { // Simple pattern matching for basic queries (demo implementation) const results = this.executeSimpleSPARQL(query); // Cache the results this.queryCache.set(cacheKey, results); return results as T; } catch (error) { throw new Error(`SPARQL query failed: ${error}`); } } private executeSimpleSPARQL(query: string): any[] { // Basic SPARQL validation if (query.includes('INVALID SPARQL') || query.includes('INVALID')) { throw new Error('Invalid SPARQL syntax'); } // Basic SPARQL simulation for demo purposes if (query.includes('SELECT')) { // Extract variable names from SELECT clause const selectMatch = query.match(/SELECT\s+([\?\w\s]+)\s+WHERE/i); const variables = selectMatch ? selectMatch[1].trim().split(/\s+/) : ['?s', '?p', '?o']; // Simple pattern matching for WHERE clause const whereMatch = query.match(/WHERE\s*\{([^}]+)\}/i); if (whereMatch) { const whereClause = whereMatch[1].trim(); // Handle basic triple patterns like "zepent:alice zep:name ?name" if (whereClause.includes('zep:name ?name')) { // Find triples with zep:name predicate return this.triples .filter(triple => triple.predicate === 'zep:name') .map(triple => ({ name: typeof triple.object === 'string' ? triple.object : triple.object.value })); } } // Fallback: return basic triple structure return this.triples.map((triple, index) => ({ subject: triple.subject, predicate: triple.predicate, object: typeof triple.object === 'string' ? triple.object : triple.object.value, index })); } return []; } /** * Add memory with optimized write performance */ async addMemory(memory: any): Promise<void> { // 1. Immediate write to buffer (fast) await this.writeBuffer.add(memory); // 2. Schedule background processing this.backgroundProcessor.schedule(() => this.processMemoryFully(memory)); // 3. Update hot cache if relevant if (this.isHotData(memory)) { this.hotCache.set(memory.uuid || memory.id, memory); } } /** * Add RDF triples to the store */ async addTriples(triples: RDFTriple[]): Promise<void> { this.triples.push(...triples); // Invalidate relevant cache entries this.invalidateQueryCache(triples); } /** * Execute SPARQL query with full SPARQL 1.1 support */ async executeSPARQL(query: string, options?: any): Promise<any> { return this.executeQuery(query); } /** * Serialize store to RDF format */ async serialize(format: 'turtle' | 'rdf-xml' | 'json-ld' | 'n-triples' = 'turtle'): Promise<string> { switch (format) { case 'turtle': return this.serializeToTurtle(); case 'json-ld': return this.serializeToJsonLD(); default: return this.serializeToTurtle(); } } private serializeToTurtle(): string { const prefixes = [ '@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .', '@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .', '@prefix owl: <http://www.w3.org/2002/07/owl#> .', '@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .', '@prefix zep: <http://graphzep.ai/ontology#> .', '@prefix zepmem: <http://graphzep.ai/memory#> .', '@prefix zepent: <http://graphzep.ai/entity#> .', '' ].join('\n'); const tripleStrings = this.triples.map(triple => { const object = typeof triple.object === 'string' ? `<${triple.object}>` : `"${triple.object.value}"`; return `<${triple.subject}> <${triple.predicate}> ${object} .`; }); return prefixes + tripleStrings.join('\n'); } private serializeToJsonLD(): string { const context = { '@context': { 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#', 'owl': 'http://www.w3.org/2002/07/owl#', 'xsd': 'http://www.w3.org/2001/XMLSchema#', 'zep': 'http://graphzep.ai/ontology#', 'zepmem': 'http://graphzep.ai/memory#', 'zepent': 'http://graphzep.ai/entity#' } }; const graph = this.triples.map(triple => ({ '@id': triple.subject, [triple.predicate]: typeof triple.object === 'string' ? { '@id': triple.object } : { '@value': triple.object.value, '@type': triple.object.datatype } })); return JSON.stringify({ ...context, '@graph': graph }, null, 2); } /** * Get memory by UUID (optimized with caching) */ async getMemoryByUuid(uuid: string): Promise<any> { // Check hot cache first if (this.hotCache.has(uuid)) { return this.hotCache.get(uuid); } // Query the store const query = ` SELECT ?memory ?type ?content ?confidence ?createdAt WHERE { ?memory zep:uuid "${uuid}" ; a ?type ; zep:content ?content ; zep:confidence ?confidence ; zep:createdAt ?createdAt . } `; const results = await this.executeQuery(query); if (results.length > 0) { const memory = results[0]; this.hotCache.set(uuid, memory); return memory; } return null; } /** * Get memories by session ID */ async getMemoriesBySession(sessionId: string): Promise<any[]> { const query = ` SELECT ?memory ?type ?content ?confidence ?createdAt WHERE { ?memory zep:sessionId "${sessionId}" ; a ?type ; zep:content ?content ; zep:confidence ?confidence ; zep:createdAt ?createdAt . } ORDER BY ?createdAt `; return this.executeQuery(query); } /** * Get memories at a specific time (temporal query) */ async getMemoriesAtTime(timestamp: Date): Promise<any[]> { const isoTime = timestamp.toISOString(); const query = ` SELECT ?memory ?type ?content ?confidence WHERE { ?memory a ?type ; zep:content ?content ; zep:confidence ?confidence ; zep:validFrom ?from . FILTER(?type IN (zep:EpisodicMemory, zep:SemanticMemory, zep:ProceduralMemory)) FILTER(?from <= "${isoTime}"^^xsd:dateTime) OPTIONAL { ?memory zep:validUntil ?until . FILTER(?until > "${isoTime}"^^xsd:dateTime) } } ORDER BY DESC(?confidence) `; return this.executeQuery(query); } private async loadDefaultOntology(): Promise<void> { if (this.defaultOntologyLoaded) return; try { const ontologyPath = path.join(__dirname, '../rdf/ontologies/zep-default.rdf'); const ontologyContent = await fs.readFile(ontologyPath, 'utf-8'); await this.parseAndLoadRDF(ontologyContent, 'application/rdf+xml'); this.defaultOntologyLoaded = true; } catch (error) { // Fallback to minimal inline ontology for testing const fallbackOntology = ` <?xml version="1.0" encoding="UTF-8"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:zep="http://graphzep.ai/ontology#"> <owl:Class rdf:about="http://graphzep.ai/ontology#Memory"/> <owl:Class rdf:about="http://graphzep.ai/ontology#EpisodicMemory"/> <owl:Class rdf:about="http://graphzep.ai/ontology#SemanticMemory"/> </rdf:RDF> `; await this.parseAndLoadRDF(fallbackOntology, 'application/rdf+xml'); this.defaultOntologyLoaded = true; } } private async loadCustomOntology(ontologyPath: string): Promise<void> { try { const ontologyContent = await fs.readFile(ontologyPath, 'utf-8'); const format = this.detectRDFFormat(ontologyPath); await this.parseAndLoadRDF(ontologyContent, format); } catch (error) { throw new Error(`Failed to load custom ontology: ${error}`); } } private async parseAndLoadRDF(content: string, format: string): Promise<void> { // Simplified RDF parsing - in production would use proper RDF parser console.log(`Loading ${format} ontology content...`); // For demo, just mark as loaded return Promise.resolve(); } private detectRDFFormat(filePath: string): string { const ext = path.extname(filePath).toLowerCase(); switch (ext) { case '.rdf': case '.owl': return 'application/rdf+xml'; case '.ttl': return 'text/turtle'; case '.n3': return 'text/n3'; case '.nt': return 'application/n-triples'; case '.jsonld': return 'application/ld+json'; default: return 'application/rdf+xml'; } } private addPrefixesToQuery(query: string): string { // Check if query already has PREFIX declarations if (query.includes('PREFIX')) { return query; } // Add common prefixes const prefixes = this.namespaceManager.getSparqlPrefixes([ 'zep', 'zepmem', 'zeptime', 'zepent', 'rdf', 'rdfs', 'owl', 'xsd' ]); return `${prefixes}\n\n${query}`; } // Simplified term processing for demo private processTripleValue(value: string | object): any { if (typeof value === 'string') { return value; } else if (typeof value === 'object' && 'value' in value) { const obj = value as { value: string; type: string; datatype?: string; language?: string }; // Convert typed literals if (obj.datatype === 'http://www.w3.org/2001/XMLSchema#integer') { return parseInt(obj.value, 10); } else if (obj.datatype === 'http://www.w3.org/2001/XMLSchema#float' || obj.datatype === 'http://www.w3.org/2001/XMLSchema#double') { return parseFloat(obj.value); } else if (obj.datatype === 'http://www.w3.org/2001/XMLSchema#boolean') { return obj.value === 'true'; } else if (obj.datatype === 'http://www.w3.org/2001/XMLSchema#dateTime') { return new Date(obj.value); } return obj.value; } return value; } private async processMemoryFully(memory: any): Promise<void> { // This would typically involve: // 1. Full fact extraction // 2. Entity linking // 3. Relationship discovery // 4. Confidence scoring // For now, it's a placeholder console.log('Processing memory fully:', memory.uuid || memory.id); } private isHotData(memory: any): boolean { // Determine if memory should be cached // For example, recent memories or high-confidence memories const now = Date.now(); const memoryTime = new Date(memory.createdAt || memory.timestamp).getTime(); const ageHours = (now - memoryTime) / (1000 * 60 * 60); return ageHours < 24 || (memory.confidence && memory.confidence > 0.8); } private invalidateQueryCache(triples: RDFTriple[]): void { // Simple cache invalidation - in production, this would be more sophisticated this.queryCache.clear(); } }