UNPKG

personal-data-wallet-sdk

Version:

TypeScript SDK for Personal Data Wallet - Decentralized memory system with AI embeddings, HNSW vector search, SEAL encryption and Walrus storage

712 lines (616 loc) 21.5 kB
/** * MemoryAnalyticsService - Memory Analytics & Insights * * Provides comprehensive analytics and insights for memory data including: * - Usage pattern analysis * - Similarity clustering * - Trend analysis and forecasting * - Knowledge discovery * - Content sentiment analysis * - Recommendation engines */ import { UnifiedMemoryResult } from './MemoryRetrievalService'; import { KnowledgeGraphManager } from '../graph/KnowledgeGraphManager'; import { VectorManager } from '../vector/VectorManager'; export interface MemoryAnalytics { userId: string; periodStart: Date; periodEnd: Date; // Basic statistics totalMemories: number; totalSize: number; averageImportance: number; // Usage patterns usagePatterns: UsagePattern[]; // Content analysis topCategories: Array<{ category: string; count: number; percentage: number }>; topTags: Array<{ tag: string; count: number; coOccurrences: string[] }>; contentDistribution: { textContent: number; multimedia: number; documents: number; other: number; }; // Temporal analysis temporalTrends: { creationTrend: TrendAnalysis; accessTrend: TrendAnalysis; sizeTrend: TrendAnalysis; }; // Similarity and clustering similarityClusters: SimilarityCluster[]; // Knowledge insights knowledgeInsights: MemoryInsights; // Performance metrics retrievalPerformance: { averageRetrievalTime: number; cacheHitRate: number; popularMemories: string[]; }; } export interface UsagePattern { type: 'creation' | 'access' | 'modification' | 'sharing'; pattern: 'daily' | 'weekly' | 'monthly' | 'seasonal' | 'irregular'; peakTimes: Array<{ period: string; intensity: number }>; frequency: number; trend: 'increasing' | 'decreasing' | 'stable'; anomalies: Array<{ date: Date; type: 'spike' | 'drop' | 'outlier'; severity: number; possibleCauses?: string[]; }>; } export interface SimilarityCluster { id: string; label: string; memories: string[]; centroid: number[]; coherence: number; size: number; characteristics: { dominantTopics: string[]; averageImportance: number; timeSpread: { start: Date; end: Date }; commonTags: string[]; }; relationships: Array<{ clusterId: string; similarity: number; sharedMemories: number; }>; } export interface TrendAnalysis { direction: 'up' | 'down' | 'stable' | 'volatile'; strength: number; // 0-1 seasonality: { detected: boolean; period?: 'daily' | 'weekly' | 'monthly' | 'yearly'; amplitude?: number; }; forecast: Array<{ date: Date; predicted: number; confidence: number; upper: number; lower: number; }>; changePoints: Array<{ date: Date; significance: number; description: string; }>; } export interface MemoryInsights { knowledgeDomains: Array<{ domain: string; expertise: number; memories: string[]; keyEntities: string[]; growthRate: number; }>; learningProgression: Array<{ topic: string; startDate: Date; currentLevel: number; progressRate: number; milestones: Array<{ date: Date; achievement: string; memoryId: string; }>; }>; conceptualConnections: Array<{ concept1: string; concept2: string; connectionStrength: number; bridgeMemories: string[]; evolutionOverTime: Array<{ date: Date; strength: number; }>; }>; contentQuality: { averageSentiment: number; clarityScore: number; completenessScore: number; originalityScore: number; }; recommendations: Array<{ type: 'explore' | 'review' | 'connect' | 'expand'; priority: number; title: string; description: string; memoryIds: string[]; reasoning: string; }>; } /** * Memory Analytics Service */ export class MemoryAnalyticsService { private graphManager: KnowledgeGraphManager; private vectorManager: VectorManager; // Analytics cache private analyticsCache = new Map<string, { analytics: MemoryAnalytics; timestamp: number }>(); private readonly ANALYTICS_CACHE_TTL = 60 * 60 * 1000; // 1 hour constructor(config?: { graphManager?: KnowledgeGraphManager; vectorManager?: VectorManager; }) { this.graphManager = config?.graphManager ?? new KnowledgeGraphManager(); // Use placeholder vector manager until proper service injection is implemented this.vectorManager = config?.vectorManager ?? ({} as any); } // ==================== MAIN ANALYTICS METHODS ==================== /** * Generate comprehensive analytics for a user's memories */ async generateMemoryAnalytics( userId: string, memories: UnifiedMemoryResult[], options?: { includeForecasting?: boolean; includeClustering?: boolean; includeInsights?: boolean; periodStart?: Date; periodEnd?: Date; } ): Promise<MemoryAnalytics> { const cacheKey = `analytics:${userId}:${JSON.stringify(options)}`; // Check cache const cached = this.getCachedAnalytics(cacheKey); if (cached) return cached; const startDate = options?.periodStart || new Date(Date.now() - 90 * 24 * 60 * 60 * 1000); // 90 days ago const endDate = options?.periodEnd || new Date(); // Filter memories by period const periodMemories = memories.filter(m => m.created >= startDate && m.created <= endDate ); try { // Basic statistics const basicStats = this.calculateBasicStats(periodMemories); // Usage patterns const usagePatterns = await this.analyzeUsagePatterns(periodMemories, userId); // Content analysis const contentAnalysis = this.analyzeContent(periodMemories); // Temporal trends const temporalTrends = await this.analyzeTempralTrends( periodMemories, options?.includeForecasting ?? true ); // Similarity clustering let similarityClusters: SimilarityCluster[] = []; if (options?.includeClustering !== false) { similarityClusters = await this.performSimilarityClustering(periodMemories); } // Knowledge insights let knowledgeInsights: MemoryInsights = { knowledgeDomains: [], learningProgression: [], conceptualConnections: [], contentQuality: { averageSentiment: 0.5, clarityScore: 0.7, completenessScore: 0.8, originalityScore: 0.6 }, recommendations: [] }; if (options?.includeInsights !== false) { knowledgeInsights = await this.generateKnowledgeInsights(periodMemories, userId); } // Performance metrics const performanceMetrics = this.calculatePerformanceMetrics(periodMemories); const analytics: MemoryAnalytics = { userId, periodStart: startDate, periodEnd: endDate, ...basicStats, usagePatterns, ...contentAnalysis, temporalTrends, similarityClusters, knowledgeInsights, retrievalPerformance: performanceMetrics }; // Cache results this.cacheAnalytics(cacheKey, analytics); return analytics; } catch (error) { throw new Error(`Analytics generation failed: ${error instanceof Error ? error.message : 'Unknown error'}`); } } /** * Analyze usage patterns over time */ async analyzeUsagePatterns( memories: UnifiedMemoryResult[], userId: string ): Promise<UsagePattern[]> { const patterns: UsagePattern[] = []; // Creation pattern const creationPattern = this.analyzeCreationPattern(memories); patterns.push(creationPattern); // Access pattern (if analytics data is available) const accessPattern = this.analyzeAccessPattern(memories); patterns.push(accessPattern); return patterns; } /** * Perform similarity-based clustering */ async performSimilarityClustering( memories: UnifiedMemoryResult[], options?: { numberOfClusters?: number; similarityThreshold?: number; algorithm?: 'kmeans' | 'hierarchical' | 'dbscan'; } ): Promise<SimilarityCluster[]> { const clusters: SimilarityCluster[] = []; // Extract embeddings from memories (if available) const embeddings = await this.extractEmbeddings(memories); // Perform clustering const clusterAssignments = await this.performClustering( embeddings, options?.algorithm || 'kmeans', options?.numberOfClusters || Math.min(10, Math.floor(memories.length / 5)) ); // Build cluster objects for (let i = 0; i < clusterAssignments.length; i++) { const clusterMemories = memories.filter((_, idx) => clusterAssignments[idx] === i); if (clusterMemories.length > 0) { const cluster = await this.buildCluster(clusterMemories, i); clusters.push(cluster); } } // Calculate inter-cluster relationships this.calculateClusterRelationships(clusters); return clusters; } /** * Generate knowledge insights from memories */ async generateKnowledgeInsights( memories: UnifiedMemoryResult[], userId: string ): Promise<MemoryInsights> { // Analyze knowledge domains const knowledgeDomains = await this.identifyKnowledgeDomains(memories); // Track learning progression const learningProgression = await this.analyzeLearningProgression(memories); // Find conceptual connections const conceptualConnections = await this.findConceptualConnections(memories, userId); // Analyze content quality const contentQuality = await this.analyzeContentQuality(memories); // Generate recommendations const recommendations = await this.generateRecommendations( memories, knowledgeDomains, learningProgression, conceptualConnections ); return { knowledgeDomains, learningProgression, conceptualConnections, contentQuality, recommendations }; } // ==================== HELPER METHODS ==================== private calculateBasicStats(memories: UnifiedMemoryResult[]): { totalMemories: number; totalSize: number; averageImportance: number; } { const totalMemories = memories.length; const totalSize = memories.reduce((sum, m) => sum + m.metadata.size, 0); const averageImportance = memories.reduce((sum, m) => sum + m.metadata.importance, 0) / totalMemories || 0; return { totalMemories, totalSize, averageImportance }; } private analyzeContent(memories: UnifiedMemoryResult[]): { topCategories: Array<{ category: string; count: number; percentage: number }>; topTags: Array<{ tag: string; count: number; coOccurrences: string[] }>; contentDistribution: { textContent: number; multimedia: number; documents: number; other: number; }; } { // Category analysis const categoryMap = new Map<string, number>(); memories.forEach(m => { categoryMap.set(m.category, (categoryMap.get(m.category) || 0) + 1); }); const topCategories = Array.from(categoryMap.entries()) .map(([category, count]) => ({ category, count, percentage: (count / memories.length) * 100 })) .sort((a, b) => b.count - a.count) .slice(0, 10); // Tag analysis const tagMap = new Map<string, { count: number; coOccurrences: Set<string> }>(); memories.forEach(m => { m.metadata.tags.forEach(tag => { if (!tagMap.has(tag)) { tagMap.set(tag, { count: 0, coOccurrences: new Set() }); } tagMap.get(tag)!.count++; // Track co-occurrences m.metadata.tags.forEach(otherTag => { if (otherTag !== tag) { tagMap.get(tag)!.coOccurrences.add(otherTag); } }); }); }); const topTags = Array.from(tagMap.entries()) .map(([tag, data]) => ({ tag, count: data.count, coOccurrences: Array.from(data.coOccurrences).slice(0, 5) })) .sort((a, b) => b.count - a.count) .slice(0, 20); // Content type distribution const contentDistribution = { textContent: memories.filter(m => m.metadata.contentType === 'text').length, multimedia: memories.filter(m => ['image', 'video', 'audio'].includes(m.metadata.contentType)).length, documents: memories.filter(m => ['pdf', 'doc', 'docx'].includes(m.metadata.contentType)).length, other: 0 }; contentDistribution.other = memories.length - contentDistribution.textContent - contentDistribution.multimedia - contentDistribution.documents; return { topCategories, topTags, contentDistribution }; } private analyzeCreationPattern(memories: UnifiedMemoryResult[]): UsagePattern { // Analyze when memories are created const hourlyDistribution = new Array(24).fill(0); const dailyDistribution = new Array(7).fill(0); memories.forEach(memory => { const hour = memory.created.getHours(); const day = memory.created.getDay(); hourlyDistribution[hour]++; dailyDistribution[day]++; }); // Find peak times const peakHours = hourlyDistribution .map((count, hour) => ({ period: `${hour}:00`, intensity: count })) .sort((a, b) => b.intensity - a.intensity) .slice(0, 3); // Determine pattern type const pattern = this.determinePattern(memories.map(m => m.created)); return { type: 'creation', pattern, peakTimes: peakHours, frequency: memories.length, trend: 'stable', // Would need time series analysis anomalies: [] // Would detect using statistical methods }; } private analyzeAccessPattern(memories: UnifiedMemoryResult[]): UsagePattern { // Placeholder - would analyze access patterns if data available return { type: 'access', pattern: 'irregular', peakTimes: [], frequency: 0, trend: 'stable', anomalies: [] }; } private determinePattern(dates: Date[]): UsagePattern['pattern'] { // Simple pattern detection - could be enhanced with statistical analysis if (dates.length < 7) return 'irregular'; // Check for daily pattern (consistent creation times) const hours = dates.map(d => d.getHours()); const hourVariance = this.calculateVariance(hours); if (hourVariance < 4) return 'daily'; // Check for weekly pattern const days = dates.map(d => d.getDay()); const dayVariance = this.calculateVariance(days); if (dayVariance < 2) return 'weekly'; return 'irregular'; } private calculateVariance(numbers: number[]): number { const mean = numbers.reduce((a, b) => a + b, 0) / numbers.length; const variance = numbers.reduce((sum, num) => sum + Math.pow(num - mean, 2), 0) / numbers.length; return variance; } // Placeholder implementations for complex analytics methods private async analyzeTempralTrends( memories: UnifiedMemoryResult[], includeForecasting: boolean ): Promise<MemoryAnalytics['temporalTrends']> { return { creationTrend: { direction: 'stable', strength: 0.5, seasonality: { detected: false }, forecast: [], changePoints: [] }, accessTrend: { direction: 'stable', strength: 0.5, seasonality: { detected: false }, forecast: [], changePoints: [] }, sizeTrend: { direction: 'up', strength: 0.3, seasonality: { detected: false }, forecast: [], changePoints: [] } }; } private calculatePerformanceMetrics(memories: UnifiedMemoryResult[]): MemoryAnalytics['retrievalPerformance'] { return { averageRetrievalTime: 150, // ms cacheHitRate: 0.85, popularMemories: memories .sort((a, b) => (b.analytics?.viewCount || 0) - (a.analytics?.viewCount || 0)) .slice(0, 10) .map(m => m.id) }; } private async extractEmbeddings(memories: UnifiedMemoryResult[]): Promise<number[][]> { // Extract or generate embeddings for clustering return memories.map(() => new Array(384).fill(0).map(() => Math.random())); } private async performClustering( embeddings: number[][], algorithm: string, clusters: number ): Promise<number[]> { // Perform clustering algorithm - simplified k-means return embeddings.map((_, i) => i % clusters); } private async buildCluster(memories: UnifiedMemoryResult[], clusterId: number): Promise<SimilarityCluster> { const centroid = new Array(384).fill(0).map(() => Math.random()); return { id: `cluster_${clusterId}`, label: `Memory Cluster ${clusterId + 1}`, memories: memories.map(m => m.id), centroid, coherence: 0.75, size: memories.length, characteristics: { dominantTopics: memories.slice(0, 3).map(m => m.category), averageImportance: memories.reduce((sum, m) => sum + m.metadata.importance, 0) / memories.length, timeSpread: { start: new Date(Math.min(...memories.map(m => m.created.getTime()))), end: new Date(Math.max(...memories.map(m => m.created.getTime()))) }, commonTags: this.findCommonTags(memories) }, relationships: [] }; } private findCommonTags(memories: UnifiedMemoryResult[]): string[] { const tagCounts = new Map<string, number>(); memories.forEach(m => { m.metadata.tags.forEach(tag => { tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1); }); }); return Array.from(tagCounts.entries()) .filter(([_, count]) => count >= memories.length * 0.3) // At least 30% of memories .map(([tag]) => tag) .slice(0, 5); } private calculateClusterRelationships(clusters: SimilarityCluster[]): void { clusters.forEach(cluster => { cluster.relationships = clusters .filter(other => other.id !== cluster.id) .map(other => ({ clusterId: other.id, similarity: this.calculateClusterSimilarity(cluster, other), sharedMemories: this.countSharedMemories(cluster, other) })) .sort((a, b) => b.similarity - a.similarity) .slice(0, 3); }); } private calculateClusterSimilarity(cluster1: SimilarityCluster, cluster2: SimilarityCluster): number { // Calculate cosine similarity between centroids return Math.random() * 0.5 + 0.25; // Placeholder } private countSharedMemories(cluster1: SimilarityCluster, cluster2: SimilarityCluster): number { return cluster1.memories.filter(id => cluster2.memories.includes(id)).length; } // Knowledge analysis placeholders private async identifyKnowledgeDomains(memories: UnifiedMemoryResult[]): Promise<MemoryInsights['knowledgeDomains']> { return []; } private async analyzeLearningProgression(memories: UnifiedMemoryResult[]): Promise<MemoryInsights['learningProgression']> { return []; } private async findConceptualConnections( memories: UnifiedMemoryResult[], userId: string ): Promise<MemoryInsights['conceptualConnections']> { return []; } private async analyzeContentQuality(memories: UnifiedMemoryResult[]): Promise<MemoryInsights['contentQuality']> { return { averageSentiment: 0.65, clarityScore: 0.75, completenessScore: 0.80, originalityScore: 0.70 }; } private async generateRecommendations( memories: UnifiedMemoryResult[], domains: MemoryInsights['knowledgeDomains'], progression: MemoryInsights['learningProgression'], connections: MemoryInsights['conceptualConnections'] ): Promise<MemoryInsights['recommendations']> { return []; } // ==================== CACHE MANAGEMENT ==================== private getCachedAnalytics(key: string): MemoryAnalytics | null { const cached = this.analyticsCache.get(key); if (cached && Date.now() - cached.timestamp < this.ANALYTICS_CACHE_TTL) { return cached.analytics; } this.analyticsCache.delete(key); return null; } private cacheAnalytics(key: string, analytics: MemoryAnalytics): void { this.analyticsCache.set(key, { analytics, timestamp: Date.now() }); } /** * Clear analytics cache */ clearCache(): void { this.analyticsCache.clear(); } /** * Get cache statistics */ getCacheStats(): { entries: number; totalSize: number } { return { entries: this.analyticsCache.size, totalSize: JSON.stringify(Array.from(this.analyticsCache.values())).length }; } }