UNPKG

@codai/memorai-core

Version:

Simplified advanced memory engine - no tiers, just powerful semantic search with persistence

395 lines (390 loc) 16.7 kB
/** * Relationship Extraction Engine * Automatically identifies and extracts relationships between memory entities */ import OpenAI from 'openai'; export class RelationshipExtractor { constructor(config = {}) { this.config = { model: 'gpt-4o-mini', minConfidence: 0.6, maxRelationshipsPerMemory: 10, enableSemanticAnalysis: true, enableTemporalAnalysis: true, semanticThreshold: 0.7, ...config, }; if (this.config.openaiApiKey) { this.openai = new OpenAI({ apiKey: this.config.openaiApiKey, }); } } /** * Extract relationships from a set of memories */ async extractRelationships(memories) { const relationships = []; // Extract relationships using multiple methods if (this.config.enableSemanticAnalysis) { const semanticRelationships = await this.extractSemanticRelationships(memories); relationships.push(...semanticRelationships); } if (this.config.enableTemporalAnalysis) { const temporalRelationships = await this.extractTemporalRelationships(memories); relationships.push(...temporalRelationships); } // AI-powered relationship extraction if OpenAI is available if (this.openai) { const aiRelationships = await this.extractAIRelationships(memories); relationships.push(...aiRelationships); } // Deduplicate and filter by confidence const uniqueRelationships = this.deduplicateRelationships(relationships); return uniqueRelationships .filter(rel => rel.confidence >= this.config.minConfidence) .sort((a, b) => b.confidence - a.confidence); } /** * Extract semantic relationships based on content similarity and tags */ async extractSemanticRelationships(memories) { const relationships = []; for (let i = 0; i < memories.length; i++) { for (let j = i + 1; j < memories.length; j++) { const memA = memories[i]; const memB = memories[j]; // Skip if same memory if (memA.id === memB.id) continue; // Tag-based relationships const sharedTags = memA.tags.filter(tag => memB.tags.includes(tag)); if (sharedTags.length > 0) { const strength = sharedTags.length / Math.max(memA.tags.length, memB.tags.length); const confidence = Math.min(0.9, strength + 0.2); if (confidence >= this.config.semanticThreshold) { relationships.push({ id: `semantic_${memA.id}_${memB.id}`, sourceMemoryId: memA.id, targetMemoryId: memB.id, relationshipType: 'semantic', strength, confidence, description: `Related through shared tags: ${sharedTags.join(', ')}`, extractedAt: new Date(), metadata: { keywords: sharedTags, evidence: [`Shared tags: ${sharedTags.join(', ')}`], }, }); } } // Content similarity (basic keyword matching) const contentSimilarity = this.calculateContentSimilarity(memA.content, memB.content); if (contentSimilarity > this.config.semanticThreshold) { relationships.push({ id: `content_${memA.id}_${memB.id}`, sourceMemoryId: memA.id, targetMemoryId: memB.id, relationshipType: 'semantic', strength: contentSimilarity, confidence: contentSimilarity, description: `Similar content patterns detected`, extractedAt: new Date(), metadata: { evidence: [ `Content similarity score: ${contentSimilarity.toFixed(2)}`, ], }, }); } // Type-based relationships if (memA.type === memB.type) { relationships.push({ id: `type_${memA.id}_${memB.id}`, sourceMemoryId: memA.id, targetMemoryId: memB.id, relationshipType: 'associative', strength: 0.6, confidence: 0.7, description: `Both memories are of type: ${memA.type}`, extractedAt: new Date(), metadata: { keywords: [memA.type], evidence: [`Same memory type: ${memA.type}`], }, }); } } // Limit relationships per memory if (relationships.length > this.config.maxRelationshipsPerMemory * i) { break; } } return relationships; } /** * Extract temporal relationships based on timestamps */ async extractTemporalRelationships(memories) { const relationships = []; // Sort memories by creation time const sortedMemories = [...memories].sort((a, b) => a.createdAt.getTime() - b.createdAt.getTime()); for (let i = 0; i < sortedMemories.length - 1; i++) { const currentMemory = sortedMemories[i]; const nextMemory = sortedMemories[i + 1]; const timeDiff = nextMemory.createdAt.getTime() - currentMemory.createdAt.getTime(); const hoursDiff = timeDiff / (1000 * 60 * 60); // Close temporal proximity (within 4 hours) if (hoursDiff <= 4) { const strength = 1 - hoursDiff / 4; // Closer = stronger const confidence = 0.8; relationships.push({ id: `temporal_${currentMemory.id}_${nextMemory.id}`, sourceMemoryId: currentMemory.id, targetMemoryId: nextMemory.id, relationshipType: 'temporal', strength, confidence, description: `Created ${hoursDiff.toFixed(1)} hours apart`, extractedAt: new Date(), metadata: { context: `Temporal sequence with ${hoursDiff.toFixed(1)} hour gap`, evidence: [ `Created at ${currentMemory.createdAt.toISOString()}`, `Followed by ${nextMemory.createdAt.toISOString()}`, ], }, }); } // Sequential patterns (same agent, close timing, similar types) if (currentMemory.agent_id === nextMemory.agent_id && currentMemory.agent_id && hoursDiff <= 1 && this.areTypesRelated(currentMemory.type, nextMemory.type)) { relationships.push({ id: `sequential_${currentMemory.id}_${nextMemory.id}`, sourceMemoryId: currentMemory.id, targetMemoryId: nextMemory.id, relationshipType: 'sequential', strength: 0.9, confidence: 0.85, description: `Sequential workflow step for agent ${currentMemory.agent_id}`, extractedAt: new Date(), metadata: { context: `Agent workflow sequence`, keywords: [currentMemory.agent_id], evidence: [ `Same agent: ${currentMemory.agent_id}`, `Close timing: ${hoursDiff.toFixed(1)} hours`, ], }, }); } } return relationships; } /** * Extract relationships using AI analysis */ async extractAIRelationships(memories) { if (!this.openai) return []; const relationships = []; // Process memories in batches to avoid token limits const batchSize = 5; for (let i = 0; i < memories.length; i += batchSize) { const batch = memories.slice(i, i + batchSize); try { const prompt = this.buildRelationshipPrompt(batch); const response = await this.openai.chat.completions.create({ model: this.config.model, messages: [ { role: 'system', content: 'You are an expert at analyzing relationships between pieces of information. Identify meaningful relationships between memory entries.', }, { role: 'user', content: prompt, }, ], temperature: 0.3, max_tokens: 1500, }); const aiAnalysis = response.choices[0]?.message?.content; if (aiAnalysis) { const extractedRelationships = this.parseAIRelationships(aiAnalysis, batch); relationships.push(...extractedRelationships); } } catch (error) { console.warn('AI relationship extraction failed:', error); } } return relationships; } /** * Build prompt for AI relationship extraction */ buildRelationshipPrompt(memories) { const memoryDescriptions = memories .map((memory, index) => `Memory ${index + 1} (ID: ${memory.id}):\n` + `Type: ${memory.type}\n` + `Content: ${memory.content}\n` + `Tags: ${memory.tags.join(', ')}\n` + `Created: ${memory.createdAt.toISOString()}\n` + `Importance: ${memory.importance}\n`) .join('\n---\n'); return `Analyze the following memories and identify meaningful relationships between them: ${memoryDescriptions} For each relationship you identify, provide: 1. Source memory ID and target memory ID 2. Relationship type (causal, temporal, semantic, hierarchical, contradictory, supportive, contextual, sequential, associative, comparative) 3. Strength (0.0-1.0) 4. Confidence (0.0-1.0) 5. Brief description Format your response as JSON array with this structure: [ { "sourceId": "memory_id", "targetId": "memory_id", "type": "relationship_type", "strength": 0.8, "confidence": 0.9, "description": "Brief explanation" } ] Only include relationships with confidence >= 0.6.`; } /** * Parse AI response into relationships */ parseAIRelationships(aiResponse, memories) { const relationships = []; try { // Extract JSON from the response const jsonMatch = aiResponse.match(/\[[\s\S]*\]/); if (!jsonMatch) return relationships; const parsedRelationships = JSON.parse(jsonMatch[0]); for (const rel of parsedRelationships) { if (rel.confidence >= this.config.minConfidence) { relationships.push({ id: `ai_${rel.sourceId}_${rel.targetId}`, sourceMemoryId: rel.sourceId, targetMemoryId: rel.targetId, relationshipType: rel.type, strength: rel.strength, confidence: rel.confidence, description: rel.description, extractedAt: new Date(), metadata: { evidence: ['AI-identified relationship'], context: 'Generated by AI analysis', }, }); } } } catch (error) { console.warn('Failed to parse AI relationships:', error); } return relationships; } /** * Calculate basic content similarity using keyword overlap */ calculateContentSimilarity(content1, content2) { const words1 = content1 .toLowerCase() .split(/\W+/) .filter(word => word.length > 3); const words2 = content2 .toLowerCase() .split(/\W+/) .filter(word => word.length > 3); if (words1.length === 0 || words2.length === 0) return 0; const commonWords = words1.filter(word => words2.includes(word)); return commonWords.length / Math.max(words1.length, words2.length); } /** * Check if two memory types are related */ areTypesRelated(type1, type2) { const relatedPairs = [ ['task', 'procedure'], ['fact', 'preference'], ['personality', 'emotion'], ['thread', 'task'], ]; return relatedPairs.some(([a, b]) => (type1 === a && type2 === b) || (type1 === b && type2 === a)); } /** * Remove duplicate relationships */ deduplicateRelationships(relationships) { const seen = new Set(); const unique = []; for (const rel of relationships) { // Create a normalized key for deduplication const key1 = `${rel.sourceMemoryId}_${rel.targetMemoryId}_${rel.relationshipType}`; const key2 = `${rel.targetMemoryId}_${rel.sourceMemoryId}_${rel.relationshipType}`; if (!seen.has(key1) && !seen.has(key2)) { seen.add(key1); unique.push(rel); } } return unique; } /** * Get relationships for a specific memory */ async getRelationshipsForMemory(memoryId, allRelationships) { const outgoing = allRelationships.filter(rel => rel.sourceMemoryId === memoryId); const incoming = allRelationships.filter(rel => rel.targetMemoryId === memoryId); return { outgoing, incoming, total: outgoing.length + incoming.length, }; } /** * Analyze relationship network metrics */ async analyzeNetworkMetrics(relationships) { const memoryIds = new Set(); const relationshipTypes = new Map(); relationships.forEach(rel => { memoryIds.add(rel.sourceMemoryId); memoryIds.add(rel.targetMemoryId); relationshipTypes.set(rel.relationshipType, (relationshipTypes.get(rel.relationshipType) || 0) + 1); }); const totalMemories = memoryIds.size; const totalRelationships = relationships.length; const averageRelationshipsPerMemory = totalMemories > 0 ? totalRelationships / totalMemories : 0; // Calculate network density (actual connections / possible connections) const possibleConnections = (totalMemories * (totalMemories - 1)) / 2; const networkDensity = possibleConnections > 0 ? totalRelationships / possibleConnections : 0; // Find strongest relationship type const strongestRelationshipType = Array.from(relationshipTypes.entries()).sort(([, a], [, b]) => b - a)[0]?.[0] || 'semantic'; // Find central memories (most connected) const connectionCounts = new Map(); relationships.forEach(rel => { connectionCounts.set(rel.sourceMemoryId, (connectionCounts.get(rel.sourceMemoryId) || 0) + 1); connectionCounts.set(rel.targetMemoryId, (connectionCounts.get(rel.targetMemoryId) || 0) + 1); }); const centralMemories = Array.from(connectionCounts.entries()) .sort(([, a], [, b]) => b - a) .slice(0, 5) .map(([memoryId]) => memoryId); return { totalRelationships, averageRelationshipsPerMemory, strongestRelationshipType, networkDensity, centralMemories, }; } }