UNPKG

@codai/memorai-core

Version:

Simplified advanced memory engine - no tiers, just powerful semantic search with persistence

539 lines (538 loc) 21.9 kB
export class MemoryClassifier { constructor() { this.patterns = { personality: [ { keywords: [ 'personality', 'behavior', 'style', 'approach', 'manner', 'character', 'trait', ], patterns: [ /\b(personality|character|behavior|style|manner|approach|trait)\b/i, /\b(is|are|seems?|acts?)\s+(very\s+)?(\w+ly|\w+)\b/i, /\b(always|usually|tends?\s+to)\b/i, ], weight: 0.9, }, { keywords: [ 'cheerful', 'reliable', 'thoughtful', 'direct', 'friendly', 'professional', ], patterns: [ /\b(cheerful|reliable|thoughtful|direct|friendly|professional|consistent)\b/i, ], weight: 0.8, }, ], procedure: [ { keywords: [ 'how', 'step', 'process', 'procedure', 'method', 'workflow', 'instructions', 'algorithm', ], patterns: [ /\bhow\s+to\b/i, /\bsteps?\b/i, /\bfirst|next|then|finally\b/i, /\bprocess\s+involves\b/i, ], weight: 0.9, }, { keywords: [ 'deploy', 'build', 'install', 'setup', 'configure', 'run', 'execute', 'follow', ], patterns: [ /\b(deploy|build|install|setup|configure|run|execute|follow)\s+\w+/i, /\binstructions\s+to\b/i, ], weight: 0.8, }, ], preference: [ { keywords: ['prefer', 'like', 'dislike', 'enjoy', 'avoid', 'favorite'], patterns: [ /\b(prefer|like|dislike|enjoy|avoid)s?\b/i, /\bfavorite\s+\w+\b/i, ], weight: 0.9, }, { keywords: ['better', 'worse', 'best', 'worst', 'choice', 'instead'], patterns: [ /\b(better|worse|best|worst)\s+(than|to|for)\b/i, /\binstead\s+of\b/i, /\brather\s+have\b/i, ], weight: 0.8, }, ], fact: [ { keywords: [ 'definition', 'means', 'defined', 'explanation', 'describes', 'information', ], patterns: [ /\b(means?|defined?|explanation|describes?|information)\b/i, /\bis\s+a\s+\w+/i, ], weight: 0.9, }, { keywords: [ 'language', 'library', 'framework', 'server', 'database', 'api', 'endpoint', 'function', 'returns', 'boolean', 'value', 'application', 'built', 'file', 'contains', 'configuration', 'settings', 'static', 'type', 'checking', ], patterns: [ /\b(language|library|framework|server|database|api|endpoint)\b/i, /\bruns\s+on\b/i, /\b(function|method)\s+(returns?|takes?|accepts?)\b/i, /\b(returns?|contains?|has)\s+(a\s+)?(boolean|string|number|array|object|value)\b/i, /\b(application|file)\s+(was\s+built|contains)\b/i, /\b(static|type|checking|configuration|settings)\b/i, ], weight: 0.8, }, ], thread: [ { keywords: [ 'said', 'mentioned', 'discussed', 'talked', 'conversation', 'chat', 'meeting', ], patterns: [ /\b(said|mentioned|discussed|talked|conversation|chat|meeting)\b/i, /\bin\s+yesterday['']?s\b/i, ], weight: 0.9, }, { keywords: [ 'question', 'answer', 'asked', 'replied', 'response', 'user', ], patterns: [ /\b(question|answer|asked|replied|response)\b/i, /\buser\s+(said|mentioned|wants?)\b/i, ], weight: 0.8, }, ], task: [ { keywords: [ 'task', 'todo', 'need', 'should', 'must', 'action', 'complete', 'finish', ], patterns: [ /\b(need\s+to|should|must|have\s+to|task|todo)\b/i, /\b(complete|finish|done)\b/i, ], weight: 0.9, }, { keywords: [ 'deadline', 'due', 'schedule', 'appointment', 'meeting', 'urgent', ], patterns: [ /\b(deadline|due|schedule|appointment|meeting|urgent)\b/i, /\bby\s+(tomorrow|friday|next\s+week)\b/i, ], weight: 0.8, }, ], emotion: [ { keywords: [ 'feel', 'emotion', 'happy', 'sad', 'angry', 'excited', 'worried', 'anxious', 'love', 'hate', 'enjoyed', ], patterns: [ /\b(feel|feeling|felt|emotions?)\b/i, /\b(happy|sad|angry|excited|worried|anxious|frustrated|pleased|enjoyed)\b/i, /\b(love|hate)\s+(the|this|that)\b/i, /\busers?\s+(love|hate)\b/i, ], weight: 0.9, }, { keywords: [ 'mood', 'emotional', 'stressed', 'relaxed', 'nervous', 'confident', ], patterns: [ /\b(mood|emotional|stressed|relaxed|nervous|confident)\b/i, /\bmakes?\s+me\s+(feel|happy|sad)\b/i, ], weight: 0.8, }, ], }; } /** * Classify a memory based on its content */ classify(content) { const scores = { personality: 0, procedure: 0, preference: 0, fact: 0, thread: 0, task: 0, emotion: 0, }; const reasoning = []; const lowerContent = content.toLowerCase(); // Calculate scores for each type for (const [type, patternGroups] of Object.entries(this.patterns)) { let typeScore = 0; const typeReasoning = []; for (const group of patternGroups) { let groupScore = 0; // Check keywords with weighted scoring const keywordMatches = group.keywords.filter(keyword => this.hasKeywordMatch(lowerContent, keyword)); if (keywordMatches.length > 0) { // Use exponential scoring for multiple matches groupScore += Math.pow(keywordMatches.length, 1.5) * 0.5; typeReasoning.push(`contains keywords: ${keywordMatches.join(', ')}`); } // Check patterns with higher weight for exact matches const patternMatches = group.patterns.filter(pattern => pattern.test(content)); if (patternMatches.length > 0) { groupScore += patternMatches.length * 0.7; typeReasoning.push(`matches patterns for ${type}`); } typeScore += groupScore * group.weight; } scores[type] = typeScore; if (typeReasoning.length > 0) { reasoning.push(`${type}: ${typeReasoning.join(', ')}`); } } // Additional heuristics this.applyLengthHeuristic(content, scores); this.applyStructureHeuristic(content, scores); this.applySpecificHeuristics(content, scores, reasoning); // Find the highest scoring type const bestType = Object.entries(scores).reduce((best, [type, score]) => score > best.score ? { type: type, score } : best, { type: 'thread', score: 0 }); // Improved confidence calculation const totalScore = Object.values(scores).reduce((sum, score) => sum + score, 0); const maxPossibleScore = this.calculateMaxPossibleScore(content); const normalizedScore = Math.min(bestType.score / Math.max(maxPossibleScore, 1), 1.0); // Calculate confidence with higher base and better distribution let confidence = 0.4; // Higher base confidence // Add score-based confidence if (bestType.score > 0) { confidence += normalizedScore * 0.4; // Up to 0.4 additional } // Add score difference bonus const secondBest = Object.values(scores).sort((a, b) => b - a)[1] || 0; const scoreDifference = bestType.score - secondBest; if (scoreDifference > 0) { confidence += Math.min(scoreDifference / (totalScore + 1), 0.2); // Up to 0.2 additional } confidence = Math.min(confidence, 1.0); return { type: bestType.type, confidence: confidence, reasoning: reasoning.join('; ') || 'No specific patterns detected', }; } /** * Batch classify multiple memories */ classifyBatch(contents) { return contents.map(content => this.classify(content)); } /** * Get classification confidence threshold recommendations */ getConfidenceThresholds() { return { personality: 0.7, procedure: 0.8, preference: 0.9, fact: 0.6, thread: 0.5, task: 0.8, emotion: 0.7, }; } /** * Validate classification result */ validateClassification(result) { if (!result) { return false; } // Check if confidence is within valid range if (result.confidence < 0 || result.confidence > 1) { return false; } // Check if type is valid const validTypes = [ 'personality', 'procedure', 'preference', 'fact', 'thread', 'task', 'emotion', ]; if (!validTypes.includes(result.type)) { return false; } // Check if reasoning is provided if (!result.reasoning || result.reasoning.trim() === '') { return false; } return true; } applyLengthHeuristic(content, scores) { const length = content.length; // Longer content might be procedures or facts if (length > 200) { scores.procedure = (scores.procedure || 0) + 0.25; // Increased slightly scores.fact = (scores.fact || 0) + 0.1; } // Very short content is likely thread/conversation if (length < 50) { scores.thread = (scores.thread || 0) + 0.3; } // Medium length might be preferences if (length >= 50 && length <= 150) { scores.preference = (scores.preference || 0) + 0.1; } } applyStructureHeuristic(content, scores) { // Check for numbered lists (procedures) if (/\b\d+\.\s/.test(content)) { scores.procedure = (scores.procedure || 0) + 0.3; } // Check for bullet points if (/^\s*[-*•]\s/m.test(content)) { scores.procedure = (scores.procedure || 0) + 0.2; scores.fact = (scores.fact || 0) + 0.1; } // Check for questions (thread/conversation) if (/\?/.test(content)) { scores.thread = (scores.thread || 0) + 0.2; } // Check for code blocks (procedures/facts) if (/```|\`/.test(content)) { scores.procedure = (scores.procedure || 0) + 0.3; scores.fact = (scores.fact || 0) + 0.2; } // Check for URLs (facts/procedures) if (/https?:\/\//.test(content)) { scores.fact = (scores.fact || 0) + 0.2; scores.procedure = (scores.procedure || 0) + 0.1; } // Check for file paths (procedures/facts) if (/[\/\\]\w+[\/\\]/.test(content)) { scores.procedure = (scores.procedure || 0) + 0.2; scores.fact = (scores.fact || 0) + 0.1; } } hasKeywordMatch(content, keyword) { // More precise keyword matching with word boundaries const regex = new RegExp(`\\b${keyword.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i'); return regex.test(content); } applySpecificHeuristics(content, scores, reasoning) { // Check for strong preference indicators first const preferenceIndicators = /\b(prefer|like|dislike|enjoy|avoid|better|worse|best|worst|favorite|rather|instead)\b/i.test(content); const evaluativeWords = /\b(best|better|worst|worse|preferred|favorite|good|great|excellent|terrible|awful)\s+(approach|style|way|method|solution|choice|option)\b/i.test(content); // Strong boost for "best/better X" patterns and preference language if (evaluativeWords || (preferenceIndicators && /\bfor\s+(our|my|your|their)\s+(use\s+case|needs?|purpose|project)\b/i.test(content))) { scores.preference = (scores.preference || 0) + 0.8; // Increased boost } else if (preferenceIndicators) { scores.preference = (scores.preference || 0) + 0.6; } // Handle "approach" context-sensitively if (/\bapproach\b/i.test(content)) { if (evaluativeWords || /\b(best|better|good|great)\s+approach\b/i.test(content)) { // Strong preference signal - "best approach", "better approach" scores.preference = (scores.preference || 0) + 0.7; } else if (preferenceIndicators || /\bprefer\s+.*approach\b/i.test(content)) { // Preference about approach scores.preference = (scores.preference || 0) + 0.6; } else if (/\b(step|process|procedure|method|how)\b/i.test(content)) { // Procedural approach scores.procedure = (scores.procedure || 0) + 0.4; } else { // Default to personality only if no other context scores.personality = (scores.personality || 0) + 0.3; } } else { // Personality-specific signals (when not about approach preference) if (/\b(personality|character|behavior|style|manner)\b/i.test(content) && !preferenceIndicators) { scores.personality = (scores.personality || 0) + 0.4; } } // Procedure-specific signals if (/\b(step|process|how|procedure|method|workflow|instructions|algorithm|first|then|next|finally)\b/i.test(content)) { scores.procedure = (scores.procedure || 0) + 0.4; } // Handle "should" when it's about preference, not task if (/\bshould\b/i.test(content)) { if (/\b(use|choose|implement|instead\s+of|rather\s+than|better\s+to)\b/i.test(content)) { // This is a preference statement scores.preference = (scores.preference || 0) + 0.5; } else { // This is a task statement scores.task = (scores.task || 0) + 0.4; } } // Task-specific signals (excluding preference "should") if (/\b(task|todo|need\s+to|must|complete|finish|deadline|due)\b/i.test(content)) { scores.task = (scores.task || 0) + 0.4; } // Emotion-specific signals if (/\b(feel|emotion|happy|sad|angry|excited|worried|anxious)\b/i.test(content)) { scores.emotion = (scores.emotion || 0) + 0.4; } // Handle love/hate as emotion when expressing feelings about something if (/\b(love|hate)\s+(the|this|that)\b/i.test(content) || /\busers?\s+(love|hate)\b/i.test(content)) { scores.emotion = (scores.emotion || 0) + 0.6; } // Thread-specific signals if (/\b(said|mentioned|discussed|talked|conversation|chat|question|answer|asked|replied)\b/i.test(content)) { scores.thread = (scores.thread || 0) + 0.3; } // Handle "user" more carefully - boost thread only if not about personality/behavior if (/\buser\b/i.test(content)) { const hasPersonalityContext = /\b(behaves?|personality|character|behavior|style|manner|trait|acts?|tends?\s+to|always|usually|calmly|friendly|outgoing|analytical|creative|cheerful|reliable|thoughtful|direct)\b/i.test(content); if (!hasPersonalityContext) { scores.thread = (scores.thread || 0) + 0.4; } else { // Strong boost for personality when user is mentioned with personality traits scores.personality = (scores.personality || 0) + 0.8; // Update reasoning to reflect personality classification if (reasoning) { const threadIndex = reasoning.findIndex(r => r.startsWith('thread:')); if (threadIndex !== -1) { reasoning[threadIndex] = `personality: user mentioned with personality traits`; } else { reasoning.push(`personality: user mentioned with personality traits`); } } } } // Fact-specific signals - boost for technical content if (/\b(is|are|was|were|has|have|contains|includes)\b/i.test(content)) { const hasStrongSignal = /\b(personality|character|prefer|like|step|process|how|feel|emotion|said|mentioned|task|todo)\b/i.test(content); if (!hasStrongSignal) { scores.fact = (scores.fact || 0) + 0.3; } } // Technical/programming facts if (/\b(function|returns?|boolean|value|programming|language|server|database|api|library|framework)\b/i.test(content)) { scores.fact = (scores.fact || 0) + 0.4; } } calculateMaxPossibleScore(content) { // Estimate maximum possible score based on content length and complexity const length = content.length; const wordCount = content.split(/\s+/).length; // Base score increases with content richness let maxScore = 1.0; // More words = more potential for matches maxScore += wordCount * 0.1; // Longer content = higher potential if (length > 100) maxScore += 0.5; if (length > 200) maxScore += 0.5; return maxScore; } }