@endlessblink/like-i-said-v2
Version:
Task Management & Memory for Claude - Track tasks, remember context, and maintain continuity across sessions with 27 powerful tools. Works with Claude Desktop and Claude Code.
587 lines (508 loc) • 17.3 kB
JavaScript
/**
* Memory Enrichment Pipeline
* Automatically extracts metadata, links, and context to make memories more discoverable
*/
export class MemoryEnrichment {
constructor(storage, vectorStorage) {
this.storage = storage;
this.vectorStorage = vectorStorage;
// Patterns for extraction
this.patterns = {
// Code-related patterns
codeBlocks: /```(\w+)?\n([\s\S]*?)```/g,
functions: /(?:function|const|let|var)\s+(\w+)\s*(?:=\s*)?(?:\([^)]*\)|\w+\s*=>)/g,
classes: /class\s+(\w+)(?:\s+extends\s+\w+)?/g,
imports: /(?:import|require)\s*\(?['"]([^'"]+)['"]\)?/g,
// File and path patterns
filePaths: /(?:[a-zA-Z]:)?(?:\/|\\)?(?:[\w.-]+(?:\/|\\))*[\w.-]+\.\w+/g,
urls: /https?:\/\/[^\s<>"{}|\\^`\[\]]+/g,
// Command patterns
shellCommands: /^\$\s+(.+)$/gm,
npmCommands: /npm\s+(?:run\s+)?[\w:-]+/g,
gitCommands: /git\s+[\w-]+(?:\s+[\w.-]+)*/g,
// Error patterns
errorMessages: /(?:Error|Exception|Failed|Cannot|Could not):\s*([^\n]+)/gi,
stackTraces: /at\s+.+\s+\(.+:\d+:\d+\)/g,
// Version patterns
versions: /(?:v|version\s*)?(\d+\.\d+(?:\.\d+)?(?:-[\w.-]+)?)/gi,
// Configuration patterns
jsonConfig: /\{[\s\S]*?\}/g,
yamlConfig: /^[\s-]*\w+:\s*.+$/gm,
envVars: /\b[A-Z_]+(?:[A-Z0-9_]*)\b(?=\s*=)/g,
// References
issueRefs: /#(\d+)/g,
prRefs: /(?:PR|pr)\s*#?(\d+)/g,
commitRefs: /\b[0-9a-f]{7,40}\b/g
};
// Metadata extractors
this.extractors = {
codeLanguages: new Set(),
referencedFiles: new Set(),
externalLinks: new Set(),
commands: new Set(),
errorTypes: new Set(),
technologies: new Set(),
versions: new Map()
};
}
/**
* Enrich a memory with extracted metadata
*/
async enrichMemory(memory) {
try {
// Reset extractors
this.resetExtractors();
// Extract all metadata
const enrichedData = {
codeSnippets: this.extractCodeSnippets(memory.content),
referencedFiles: this.extractFilePaths(memory.content),
externalLinks: this.extractUrls(memory.content),
commands: this.extractCommands(memory.content),
errors: this.extractErrors(memory.content),
technologies: this.detectTechnologies(memory.content),
structure: this.analyzeStructure(memory.content),
keywords: this.extractKeywords(memory.content),
entities: this.extractEntities(memory.content),
crossReferences: await this.findCrossReferences(memory)
};
// Build enhanced metadata
const enhancedMetadata = {
...memory.metadata,
enriched: true,
enrichedAt: new Date().toISOString(),
codeLanguages: Array.from(this.extractors.codeLanguages),
fileCount: enrichedData.referencedFiles.length,
linkCount: enrichedData.externalLinks.length,
commandCount: enrichedData.commands.length,
errorCount: enrichedData.errors.length,
hasCode: enrichedData.codeSnippets.length > 0,
hasDiagram: memory.content.includes('```mermaid'),
technologies: Array.from(this.extractors.technologies),
structure: enrichedData.structure,
searchableText: this.buildSearchableText(memory, enrichedData)
};
// Update memory with enhanced metadata
const enrichedMemory = {
...memory,
metadata: enhancedMetadata,
enrichment: enrichedData
};
// Update in storage
await this.storage.updateMemory(enrichedMemory);
// Update vector embeddings with enriched content
if (this.vectorStorage && this.vectorStorage.initialized) {
await this.vectorStorage.updateMemory(enrichedMemory);
}
return enrichedMemory;
} catch (error) {
console.error('Error enriching memory:', error);
return memory; // Return original if enrichment fails
}
}
/**
* Extract code snippets with language detection
*/
extractCodeSnippets(content) {
const snippets = [];
let match;
this.patterns.codeBlocks.lastIndex = 0;
while ((match = this.patterns.codeBlocks.exec(content)) !== null) {
const language = match[1] || this.detectLanguage(match[2]);
if (language) {
this.extractors.codeLanguages.add(language);
}
snippets.push({
language,
code: match[2].trim(),
startPos: match.index,
endPos: match.index + match[0].length
});
}
return snippets;
}
/**
* Detect programming language from code content
*/
detectLanguage(code) {
const indicators = {
javascript: /(?:const|let|var|function|=>|require|import\s+.*from)/,
typescript: /(?:interface|type\s+\w+\s*=|:\s*\w+(?:<|>|\[\])?)/,
python: /(?:def\s+|class\s+|import\s+|from\s+\w+\s+import|if\s+__name__)/,
java: /(?:public\s+class|private\s+|protected\s+|@\w+)/,
cpp: /(?:#include|using\s+namespace|std::|->)/,
go: /(?:func\s+|package\s+|import\s+\()/,
rust: /(?:fn\s+|impl\s+|use\s+|pub\s+|let\s+mut)/,
ruby: /(?:def\s+|class\s+|require\s+|attr_|end$)/m,
shell: /(?:^\s*#!|^\s*\$\s+|\becho\b|\bexport\b)/m,
yaml: /^[\s-]*\w+:\s*(?:\||\>|['"]|[\w-]+)/m,
json: /^\s*\{[\s\S]*\}\s*$/
};
for (const [lang, pattern] of Object.entries(indicators)) {
if (pattern.test(code)) {
return lang;
}
}
return null;
}
/**
* Extract file paths
*/
extractFilePaths(content) {
const files = [];
let match;
this.patterns.filePaths.lastIndex = 0;
while ((match = this.patterns.filePaths.exec(content)) !== null) {
const filePath = match[0];
// Filter out common false positives
if (!filePath.includes('...') && filePath.includes('.')) {
files.push({
path: filePath,
name: path.basename(filePath),
extension: path.extname(filePath)
});
this.extractors.referencedFiles.add(filePath);
}
}
return files;
}
/**
* Extract URLs
*/
extractUrls(content) {
const urls = [];
let match;
this.patterns.urls.lastIndex = 0;
while ((match = this.patterns.urls.exec(content)) !== null) {
const url = match[0];
try {
const urlObj = new URL(url);
urls.push({
full: url,
host: urlObj.hostname,
path: urlObj.pathname,
protocol: urlObj.protocol
});
this.extractors.externalLinks.add(url);
} catch (e) {
// Invalid URL, skip
}
}
return urls;
}
/**
* Extract commands
*/
extractCommands(content) {
const commands = [];
// Shell commands
let match;
this.patterns.shellCommands.lastIndex = 0;
while ((match = this.patterns.shellCommands.exec(content)) !== null) {
commands.push({
type: 'shell',
command: match[1],
full: match[0]
});
this.extractors.commands.add(match[1]);
}
// NPM commands
this.patterns.npmCommands.lastIndex = 0;
while ((match = this.patterns.npmCommands.exec(content)) !== null) {
commands.push({
type: 'npm',
command: match[0]
});
this.extractors.commands.add(match[0]);
}
// Git commands
this.patterns.gitCommands.lastIndex = 0;
while ((match = this.patterns.gitCommands.exec(content)) !== null) {
commands.push({
type: 'git',
command: match[0]
});
this.extractors.commands.add(match[0]);
}
return commands;
}
/**
* Extract error information
*/
extractErrors(content) {
const errors = [];
let match;
this.patterns.errorMessages.lastIndex = 0;
while ((match = this.patterns.errorMessages.exec(content)) !== null) {
errors.push({
type: match[0].split(':')[0],
message: match[1].trim(),
full: match[0]
});
this.extractors.errorTypes.add(match[0].split(':')[0].toLowerCase());
}
// Check for stack traces
if (this.patterns.stackTraces.test(content)) {
errors.push({
type: 'stack_trace',
message: 'Contains stack trace information'
});
}
return errors;
}
/**
* Detect technologies mentioned
*/
detectTechnologies(content) {
const technologies = new Map([
// Languages
['javascript', /\b(?:javascript|js|node\.?js|nodejs)\b/i],
['typescript', /\b(?:typescript|ts)\b/i],
['python', /\bpython\b/i],
['java', /\bjava\b(?!script)/i],
['go', /\b(?:golang|go)\b/i],
['rust', /\brust\b/i],
['ruby', /\bruby\b/i],
['php', /\bphp\b/i],
// Frameworks
['react', /\breact(?:\.?js)?\b/i],
['vue', /\bvue(?:\.?js)?\b/i],
['angular', /\bangular\b/i],
['express', /\bexpress(?:\.?js)?\b/i],
['django', /\bdjango\b/i],
['flask', /\bflask\b/i],
['rails', /\b(?:ruby on )?rails\b/i],
// Tools
['docker', /\bdocker\b/i],
['kubernetes', /\b(?:kubernetes|k8s)\b/i],
['git', /\bgit\b/i],
['npm', /\bnpm\b/i],
['webpack', /\bwebpack\b/i],
['vite', /\bvite\b/i],
// Databases
['mongodb', /\bmongodb?\b/i],
['postgresql', /\b(?:postgresql|postgres)\b/i],
['mysql', /\bmysql\b/i],
['redis', /\bredis\b/i],
['sqlite', /\bsqlite\b/i],
// Cloud/Services
['aws', /\b(?:aws|amazon web services)\b/i],
['azure', /\bazure\b/i],
['gcp', /\b(?:gcp|google cloud)\b/i],
['firebase', /\bfirebase\b/i],
// MCP specific
['mcp', /\b(?:mcp|model context protocol)\b/i],
['claude', /\bclaude\b/i],
['dxt', /\b(?:dxt|desktop extension)\b/i]
]);
const detected = [];
for (const [tech, pattern] of technologies) {
if (pattern.test(content)) {
detected.push(tech);
this.extractors.technologies.add(tech);
}
}
return detected;
}
/**
* Analyze content structure
*/
analyzeStructure(content) {
const lines = content.split('\n');
const structure = {
totalLines: lines.length,
headings: 0,
lists: 0,
codeBlocks: 0,
paragraphs: 0,
complexity: 'simple'
};
let inCodeBlock = false;
let inList = false;
lines.forEach(line => {
if (line.match(/^```/)) {
inCodeBlock = !inCodeBlock;
if (!inCodeBlock) structure.codeBlocks++;
} else if (!inCodeBlock) {
if (line.match(/^#+\s+/)) structure.headings++;
else if (line.match(/^[\s]*[-*+]\s+/) || line.match(/^[\s]*\d+\.\s+/)) {
if (!inList) structure.lists++;
inList = true;
} else if (line.trim() === '') {
inList = false;
} else if (line.trim().length > 20) {
structure.paragraphs++;
}
}
});
// Determine complexity
const score = structure.headings + structure.lists + structure.codeBlocks;
if (score > 10) structure.complexity = 'complex';
else if (score > 5) structure.complexity = 'moderate';
return structure;
}
/**
* Extract keywords using TF-IDF-like approach
*/
extractKeywords(content) {
// Remove code blocks and special characters
const cleanContent = content
.replace(/```[\s\S]*?```/g, '')
.replace(/[^\w\s-]/g, ' ')
.toLowerCase();
// Get word frequency
const words = cleanContent.split(/\s+/)
.filter(word => word.length > 3 && !this.isStopWord(word));
const frequency = {};
words.forEach(word => {
frequency[word] = (frequency[word] || 0) + 1;
});
// Get top keywords
return Object.entries(frequency)
.sort(([,a], [,b]) => b - a)
.slice(0, 10)
.map(([word,]) => word);
}
/**
* Extract named entities
*/
extractEntities(content) {
const entities = {
functions: [],
classes: [],
files: [],
urls: [],
versions: []
};
// Extract function names
let match;
this.patterns.functions.lastIndex = 0;
while ((match = this.patterns.functions.exec(content)) !== null) {
if (match[1]) entities.functions.push(match[1]);
}
// Extract class names
this.patterns.classes.lastIndex = 0;
while ((match = this.patterns.classes.exec(content)) !== null) {
if (match[1]) entities.classes.push(match[1]);
}
// Extract versions
this.patterns.versions.lastIndex = 0;
while ((match = this.patterns.versions.exec(content)) !== null) {
entities.versions.push(match[1]);
}
// Use already extracted files and URLs
entities.files = Array.from(this.extractors.referencedFiles);
entities.urls = Array.from(this.extractors.externalLinks);
return entities;
}
/**
* Find cross-references to other memories
*/
async findCrossReferences(memory) {
const references = [];
// Look for explicit memory ID references
const idPattern = /\b([a-z0-9]{20,})\b/g;
let match;
while ((match = idPattern.exec(memory.content)) !== null) {
try {
const referencedMemory = await this.storage.getMemory(match[1]);
if (referencedMemory && referencedMemory.id !== memory.id) {
references.push({
id: match[1],
type: 'explicit',
title: referencedMemory.title || 'Untitled'
});
}
} catch (e) {
// Not a valid memory ID
}
}
// Look for issue/PR references
this.patterns.issueRefs.lastIndex = 0;
while ((match = this.patterns.issueRefs.exec(memory.content)) !== null) {
references.push({
type: 'issue',
number: match[1],
full: match[0]
});
}
this.patterns.prRefs.lastIndex = 0;
while ((match = this.patterns.prRefs.exec(memory.content)) !== null) {
references.push({
type: 'pr',
number: match[1],
full: match[0]
});
}
return references;
}
/**
* Build enhanced searchable text
*/
buildSearchableText(memory, enrichedData) {
const parts = [
memory.content,
memory.title || '',
memory.summary || '',
...(memory.tags || []),
...enrichedData.keywords,
...enrichedData.technologies,
...Array.from(this.extractors.codeLanguages),
...enrichedData.entities.functions,
...enrichedData.entities.classes,
...enrichedData.referencedFiles.map(f => f.name),
...enrichedData.commands.map(c => c.command || c.full)
];
return parts.filter(Boolean).join(' ').toLowerCase();
}
/**
* Helper methods
*/
resetExtractors() {
this.extractors = {
codeLanguages: new Set(),
referencedFiles: new Set(),
externalLinks: new Set(),
commands: new Set(),
errorTypes: new Set(),
technologies: new Set(),
versions: new Map()
};
}
isStopWord(word) {
const stopWords = new Set([
'the', 'is', 'at', 'which', 'on', 'and', 'a', 'an', 'as', 'are',
'was', 'were', 'been', 'be', 'have', 'has', 'had', 'do', 'does',
'did', 'will', 'would', 'should', 'could', 'may', 'might', 'must',
'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she',
'it', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'why',
'how', 'all', 'each', 'every', 'some', 'any', 'many', 'much', 'more',
'most', 'other', 'another', 'such', 'no', 'not', 'only', 'own', 'same',
'so', 'than', 'too', 'very', 'just', 'but', 'for', 'with', 'about'
]);
return stopWords.has(word);
}
/**
* Batch enrich multiple memories
*/
async batchEnrich(memories, options = {}) {
const { parallel = 5, onProgress } = options;
const enriched = [];
// Process in batches
for (let i = 0; i < memories.length; i += parallel) {
const batch = memories.slice(i, i + parallel);
const enrichedBatch = await Promise.all(
batch.map(memory => this.enrichMemory(memory))
);
enriched.push(...enrichedBatch);
if (onProgress) {
onProgress({
current: Math.min(i + parallel, memories.length),
total: memories.length,
percentage: Math.round((Math.min(i + parallel, memories.length) / memories.length) * 100)
});
}
}
return enriched;
}
}
export default MemoryEnrichment;