UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

1,022 lines 63.5 kB
/** * Memory MCP Tools for CLI - V3 with sql.js/HNSW Backend * * UPGRADED: Now uses the advanced sql.js + HNSW backend for: * - 150x-12,500x faster semantic search * - Vector embeddings with cosine similarity * - Persistent SQLite storage (WASM) * - Backward compatible with legacy JSON storage (auto-migrates) * * @module v3/cli/mcp-tools/memory-tools */ import { existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from 'fs'; import { homedir } from 'os'; import { join, resolve } from 'path'; import { createHash } from 'crypto'; import { validateIdentifier } from './validate-input.js'; // #1604: Align with memory-initializer.ts — single source of truth is .swarm/memory.db const MEMORY_DIR = '.swarm'; const LEGACY_MEMORY_FILE = 'store.json'; const LEGACY_MEMORY_DIR = '.claude-flow/memory'; const MIGRATION_MARKER = '.migrated-to-sqlite'; function getMemoryDir() { return resolve(MEMORY_DIR); } function getLegacyPath() { return resolve(join(MEMORY_DIR, LEGACY_MEMORY_FILE)); } function getMigrationMarkerPath() { return resolve(join(MEMORY_DIR, MIGRATION_MARKER)); } function ensureMemoryDir() { const dir = getMemoryDir(); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } } // D-2: Input bounds for memory parameters const MAX_KEY_LENGTH = 1024; const MAX_VALUE_SIZE = 1024 * 1024; // 1MB const MAX_QUERY_LENGTH = 4096; // #1425 — single source of truth for the dangerous-character set rejected by // validateMemoryInput. Imported by sanitizeMemoryKey so write-side sanitization // and read-side rejection can never drift apart (the symmetry bug behind #1884). const DANGEROUS_KEY_CHARS = /[;&|`$(){}[\]<>!#\\\0]|\.\.[/\\]/g; const DANGEROUS_KEY_PATTERN = /[;&|`$(){}[\]<>!#\\\0]|\.\.[/\\]/; function validateMemoryInput(key, value, query, namespace) { if (key && key.length > MAX_KEY_LENGTH) { throw new Error(`Key exceeds maximum length of ${MAX_KEY_LENGTH} characters`); } if (value && value.length > MAX_VALUE_SIZE) { throw new Error(`Value exceeds maximum size of ${MAX_VALUE_SIZE} bytes`); } if (query && query.length > MAX_QUERY_LENGTH) { throw new Error(`Query exceeds maximum length of ${MAX_QUERY_LENGTH} characters`); } // Reject path traversal and shell metacharacters in keys/namespaces (#1425) if (key && DANGEROUS_KEY_PATTERN.test(key)) { throw new Error('Key contains disallowed characters'); } if (namespace && DANGEROUS_KEY_PATTERN.test(namespace)) { throw new Error('Namespace contains disallowed characters'); } } // #1884 — sanitize a key produced from arbitrary input (markdown headings, // frontmatter names, file names) so it survives validateMemoryInput on the // read/delete path. Replaces every dangerous char with `_`. Truncates to // MAX_KEY_LENGTH so the bound check in validateMemoryInput also passes. // Keep this in sync with DANGEROUS_KEY_PATTERN — they share DANGEROUS_KEY_CHARS. function sanitizeMemoryKey(key) { const safe = key.replace(DANGEROUS_KEY_CHARS, '_'); return safe.length > MAX_KEY_LENGTH ? safe.slice(0, MAX_KEY_LENGTH) : safe; } // #1937 — minimal glob → RegExp helper for memory_import_claude exclusion // patterns. Anchored. Supports the three operators the issue's voice-fidelity // workflow needs: // `**` — any chars including path separators // `*` — any chars except path separators // `?` — exactly one char except a path separator // Everything else is regex-escaped. Used to match absolute file paths. function globToRegex(pattern) { // Tokenize so we can replace `**` before `*` without overlap. let out = ''; for (let i = 0; i < pattern.length; i++) { const c = pattern[i]; if (c === '*' && pattern[i + 1] === '*') { out += '.*'; i++; } else if (c === '*') { out += '[^/\\\\]*'; } else if (c === '?') { out += '[^/\\\\]'; } else if (/[.+^$|(){}\[\]\\]/.test(c)) { out += '\\' + c; } else { out += c; } } return new RegExp('^' + out + '$'); } // #1883 — resolve the Claude-Code project memory directory for the *current* // project. Claude Code hashes the project path differently per host OS, and // our previous logic only POSIX-slash-replaced cwd, which breaks for: // - WSL bridges where cwd is `/mnt/<drive>/...` but Claude Code is on Windows // - paths containing spaces (Claude Code replaces spaces with dashes) // - any leading slash on POSIX (Claude Code strips it) // Strategy: try several candidate hashes and return the first one with a // memory dir that exists. An explicit `projectPathOverride` short-circuits // the heuristics for callers that know the canonical project path. function resolveProjectMemoryDir(claudeProjectsDir, projectPathOverride) { const candidates = new Set(); const sources = []; if (projectPathOverride && projectPathOverride.length > 0) { sources.push(projectPathOverride); } else { sources.push(process.cwd()); } for (const source of sources) { // Candidate 1: legacy POSIX hash — what shipped before #1883 candidates.add(source.replace(/\//g, '-')); // Candidate 2: WSL `/mnt/<drive>/...` translated to Claude-Code Windows hash // e.g. `/mnt/c/Users/x/Project Name` → `C--Users-x-Project-Name` const wsl = source.match(/^\/mnt\/([a-z])(\/.*)?$/i); if (wsl) { const drive = wsl[1].toUpperCase(); const rest = (wsl[2] ?? '').replace(/\//g, '-').replace(/ /g, '-'); candidates.add(`${drive}-${rest}`); } // Candidate 3: POSIX hash with leading dash stripped (Claude Code on macOS/Linux) const stripped = source.replace(/\//g, '-').replace(/^-+/, ''); candidates.add(stripped); // Candidate 4: spaces replaced with dashes (Claude Code's space rule) candidates.add(source.replace(/\//g, '-').replace(/ /g, '-')); // Candidate 5 (#1939): native Win32 path on a Win32 Claude Code install. // `C:\Users\tobia\OneDrive\Desktop\Claude Stuff` → // `C--Users-tobia-OneDrive-Desktop-Claude-Stuff`. Claude Code's on-disk // slug replaces drive-colon AND backslashes AND whitespace with `-`. // The earlier candidates only handled forward slashes, so a Win32+Win32 // setup never matched. if (/^[A-Za-z]:[\\/]/.test(source)) { candidates.add(source.replace(/[:\\/]/g, '-').replace(/\s+/g, '-')); } } for (const projectHash of candidates) { const memDir = join(claudeProjectsDir, projectHash, 'memory'); if (existsSync(memDir)) return { memDir, projectHash }; } return null; } /** * Check if legacy JSON store exists in old .claude-flow/memory/ location */ function hasLegacyStore() { const legacyPath = resolve(join(LEGACY_MEMORY_DIR, LEGACY_MEMORY_FILE)); const migrationMarker = resolve(join(LEGACY_MEMORY_DIR, MIGRATION_MARKER)); return existsSync(legacyPath) && !existsSync(migrationMarker); } /** * Load legacy JSON store for migration */ function loadLegacyStore() { try { const legacyPath = resolve(join(LEGACY_MEMORY_DIR, LEGACY_MEMORY_FILE)); if (existsSync(legacyPath)) { const data = readFileSync(legacyPath, 'utf-8'); return JSON.parse(data); } } catch { // Return null on error } return null; } /** * Mark migration as complete */ function markMigrationComplete() { const legacyDir = resolve(LEGACY_MEMORY_DIR); if (!existsSync(legacyDir)) mkdirSync(legacyDir, { recursive: true }); writeFileSync(resolve(join(LEGACY_MEMORY_DIR, MIGRATION_MARKER)), JSON.stringify({ migratedAt: new Date().toISOString(), version: '3.0.0', }), 'utf-8'); } /** * Lazy-load memory initializer functions to avoid circular deps */ async function getMemoryFunctions() { const { storeEntry, searchEntries, listEntries, getEntry, deleteEntry, initializeMemoryDatabase, checkMemoryInitialization, } = await import('../memory/memory-initializer.js'); return { storeEntry, searchEntries, listEntries, getEntry, deleteEntry, initializeMemoryDatabase, checkMemoryInitialization, }; } /** * Ensure memory database is initialized and migrate legacy data if needed. * #1606: Wrapped in try/catch to prevent process-level crashes that kill * the stdio MCP transport on Windows/Codex. */ async function ensureInitialized() { try { const { initializeMemoryDatabase, checkMemoryInitialization, storeEntry } = await getMemoryFunctions(); // Check if already initialized const status = await checkMemoryInitialization(); if (!status.initialized) { await initializeMemoryDatabase({ force: false, verbose: false }); } // Migrate legacy JSON data if exists (from old .claude-flow/memory/ location) if (hasLegacyStore()) { const legacyStore = loadLegacyStore(); if (legacyStore && Object.keys(legacyStore.entries).length > 0) { console.error('[MCP Memory] Migrating legacy JSON store to sql.js...'); let migrated = 0; for (const [key, entry] of Object.entries(legacyStore.entries)) { try { const value = typeof entry.value === 'string' ? entry.value : JSON.stringify(entry.value); await storeEntry({ key, value, namespace: 'default', generateEmbeddingFlag: true, }); migrated++; } catch (e) { console.error(`[MCP Memory] Failed to migrate key "${key}":`, e); } } console.error(`[MCP Memory] Migrated ${migrated}/${Object.keys(legacyStore.entries).length} entries`); markMigrationComplete(); } } } catch (error) { console.error('[MCP Memory] Initialization failed:', error instanceof Error ? error.message : error); } } export const memoryTools = [ { name: 'memory_store', description: 'Persistent key-value store with vector embedding — survives across sessions and is searchable by meaning, not just by file path. Use when native Write is wrong because the data is not a file (e.g. a learned pattern, a decision, a budget config) AND you need to recall it later by semantic query, not by path. Defaults to namespace="default"; pass --upsert=true to update an existing key.', category: 'memory', inputSchema: { type: 'object', properties: { key: { type: 'string', description: 'Memory key (unique within namespace)' }, value: { description: 'Value to store (string or object)' }, namespace: { type: 'string', description: 'Namespace for organization (default: "default")' }, tags: { type: 'array', items: { type: 'string' }, description: 'Optional tags for filtering', }, ttl: { type: 'number', description: 'Time-to-live in seconds (optional)' }, upsert: { type: 'boolean', description: 'If true, update existing key instead of failing (default: false)' }, }, required: ['key', 'value'], }, handler: async (input) => { await ensureInitialized(); const { storeEntry } = await getMemoryFunctions(); const key = input.key; const namespace = input.namespace || 'default'; const rawValue = input.value; const value = typeof rawValue === 'string' ? rawValue : (rawValue !== undefined ? JSON.stringify(rawValue) : ''); const tags = input.tags || []; const ttl = input.ttl; const upsert = input.upsert || false; if (!value) { return { success: false, key, stored: false, hasEmbedding: false, error: 'Value is required and cannot be empty', }; } validateMemoryInput(key, value, undefined, namespace); const startTime = performance.now(); try { const result = await storeEntry({ key, value, namespace, generateEmbeddingFlag: true, tags, ttl, upsert, }); const duration = performance.now() - startTime; return { success: result.success, key, namespace, stored: result.success, storedAt: new Date().toISOString(), hasEmbedding: !!result.embedding, embeddingDimensions: result.embedding?.dimensions || null, backend: 'sql.js + HNSW', storeTime: `${duration.toFixed(2)}ms`, error: result.error, }; } catch (error) { return { success: false, key, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_retrieve', description: 'Read back a value previously stored via memory_store, by exact (namespace, key) — lossless, includes metadata. Use when native Read is wrong because the value is not a file (it lives in the .swarm/memory.db SQLite store) AND you know the exact key. For semantic lookup by meaning, use memory_search.', category: 'memory', inputSchema: { type: 'object', properties: { key: { type: 'string', description: 'Memory key' }, namespace: { type: 'string', description: 'Namespace (default: "default")' }, }, required: ['key'], }, handler: async (input) => { await ensureInitialized(); const { getEntry } = await getMemoryFunctions(); const key = input.key; const namespace = input.namespace || 'default'; validateMemoryInput(key, undefined, undefined, namespace); try { const result = await getEntry({ key, namespace }); if (result.found && result.entry) { // Try to parse JSON value let value = result.entry.content; try { value = JSON.parse(result.entry.content); } catch { // Keep as string } return { key, namespace, value, tags: result.entry.tags, storedAt: result.entry.createdAt, updatedAt: result.entry.updatedAt, accessCount: result.entry.accessCount, hasEmbedding: result.entry.hasEmbedding, found: true, backend: 'sql.js + HNSW', }; } return { key, namespace, value: null, found: false, }; } catch (error) { return { key, namespace, value: null, found: false, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_search', description: 'Find stored memories by meaning (vector similarity), not by literal text — finds "JWT auth pattern" when you query "token-based login flow". Use when native Grep is wrong because Grep matches characters and you need to find conceptually-related entries across past sessions. Backed by HNSW index over ONNX embeddings; returns top-k with similarity scores. Pair with smart=true for query expansion + MMR diversity.', category: 'memory', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query (semantic similarity)' }, namespace: { type: 'string', description: 'Namespace to search (default: "default")' }, limit: { type: 'number', description: 'Maximum results (default: 10)' }, threshold: { type: 'number', description: 'Minimum similarity threshold 0-1 (default: 0.3)' }, smart: { type: 'boolean', description: 'Enable SmartRetrieval pipeline — query expansion, RRF fusion, recency boost, MMR diversity (default: false)' }, }, required: ['query'], }, handler: async (input) => { await ensureInitialized(); const { searchEntries } = await getMemoryFunctions(); const query = input.query; const namespace = input.namespace || 'default'; const limit = input.limit ?? 10; const threshold = input.threshold ?? 0.3; validateMemoryInput(undefined, undefined, query); const startTime = performance.now(); try { // #1846: feature-detect smartSearch on the resolved memory package. // The export landed in @claude-flow/memory@>3.0.0-alpha.14 — older // installs pin to a build that exposes search/store/retrieve but // not smartSearch. Throwing `is not a function` is hostile; instead // detect at runtime and gracefully fall through to plain semantic // search with an explicit fallback note. let smartFallbackReason; if (input.smart) { // eslint-disable-next-line @typescript-eslint/no-explicit-any let memMod; try { memMod = await import('@claude-flow/memory'); } catch (err) { smartFallbackReason = `@claude-flow/memory failed to load: ${err.message}`; } const smartSearch = memMod && typeof memMod.smartSearch === 'function' ? memMod.smartSearch : undefined; if (smartSearch) { // SmartRetrieval pipeline (ADR-090) const rawSearch = async (req) => { const r = await searchEntries({ query: req.query, namespace: req.namespace || namespace, limit: req.limit || limit * 3, threshold: req.threshold ?? threshold, }); return { results: r.results.map(e => ({ id: e.id, key: e.key, content: e.content, score: e.score, namespace: e.namespace, })), }; }; const smartResult = await smartSearch(rawSearch, { query, namespace, limit, threshold, }); const duration = performance.now() - startTime; const results = smartResult.results.map((r) => { let value = r.content; try { value = JSON.parse(r.content); } catch { /* keep as string */ } return { key: r.key, namespace: r.namespace, value, similarity: r.score, }; }); return { query, results, total: results.length, searchTime: `${duration.toFixed(2)}ms`, backend: 'SmartRetrieval (RRF + MMR + Recency)', stats: smartResult.stats, }; } // smart=true but smartSearch unavailable on installed package. // Fall through to plain search with an explicit warning. smartFallbackReason = smartFallbackReason ?? 'smartSearch is not exported by the installed @claude-flow/memory build (likely a release lag — see #1846). Falling back to standard semantic search.'; } // Original non-smart path (unchanged) — also reached when smart was // requested but unavailable. We attach `smartFallback` to the // response so callers can see the degradation explicitly. const result = await searchEntries({ query, namespace, limit, threshold, }); const duration = performance.now() - startTime; // Parse JSON values in results const results = result.results.map(r => { let value = r.content; try { value = JSON.parse(r.content); } catch { // Keep as string } return { key: r.key, namespace: r.namespace, value, similarity: r.score, }; }); return { query, results, total: results.length, searchTime: `${duration.toFixed(2)}ms`, backend: 'HNSW + sql.js', ...(smartFallbackReason ? { smartFallback: smartFallbackReason } : {}), }; } catch (error) { return { query, results: [], total: 0, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_delete', description: 'Remove a stored memory entry by exact (namespace, key). Use when a previously stored decision is invalidated or contains stale data. No native equivalent — Write to a file does not affect the .swarm/memory.db SQLite store.', category: 'memory', inputSchema: { type: 'object', properties: { key: { type: 'string', description: 'Memory key' }, namespace: { type: 'string', description: 'Namespace (default: "default")' }, }, required: ['key'], }, handler: async (input) => { await ensureInitialized(); const { deleteEntry } = await getMemoryFunctions(); const key = input.key; const namespace = input.namespace || 'default'; validateMemoryInput(key, undefined, undefined, namespace); try { const result = await deleteEntry({ key, namespace }); return { success: result.deleted, key, namespace, deleted: result.deleted, hnswIndexInvalidated: result.deleted, backend: 'sql.js + HNSW', }; } catch (error) { return { success: false, key, namespace, deleted: false, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_list', description: 'Enumerate stored memory entries (optionally filtered by namespace/tags) without semantic search. Use when native Glob is wrong because the entries are not files (they live in .swarm/memory.db). For inspection / audit / "what is in my memory" — pair with memory_search for retrieval-by-meaning.', category: 'memory', inputSchema: { type: 'object', properties: { namespace: { type: 'string', description: 'Filter by namespace' }, limit: { type: 'number', description: 'Maximum results (default: 50)' }, offset: { type: 'number', description: 'Offset for pagination (default: 0)' }, }, }, handler: async (input) => { await ensureInitialized(); const { listEntries } = await getMemoryFunctions(); const namespace = input.namespace; const limit = input.limit || 50; const offset = input.offset || 0; if (namespace) { const vNs = validateIdentifier(namespace, 'namespace'); if (!vNs.valid) throw new Error(vNs.error); } try { const result = await listEntries({ namespace, limit, offset, }); const entries = result.entries.map(e => ({ key: e.key, namespace: e.namespace, storedAt: e.createdAt, updatedAt: e.updatedAt, accessCount: e.accessCount, hasEmbedding: e.hasEmbedding, size: e.size, })); return { entries, total: result.total, limit, offset, backend: 'sql.js + HNSW', }; } catch (error) { return { entries: [], total: 0, limit, offset, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_stats', description: 'Get memory storage statistics including HNSW index status Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.', category: 'memory', inputSchema: { type: 'object', properties: {}, }, handler: async () => { await ensureInitialized(); const { checkMemoryInitialization, listEntries } = await getMemoryFunctions(); try { const status = await checkMemoryInitialization(); const allEntries = await listEntries({ limit: 100000 }); // Count by namespace const namespaces = {}; let withEmbeddings = 0; for (const entry of allEntries.entries) { namespaces[entry.namespace] = (namespaces[entry.namespace] || 0) + 1; if (entry.hasEmbedding) withEmbeddings++; } return { initialized: status.initialized, totalEntries: allEntries.total, entriesWithEmbeddings: withEmbeddings, embeddingCoverage: allEntries.total > 0 ? `${((withEmbeddings / allEntries.total) * 100).toFixed(1)}%` : '0%', namespaces, backend: 'sql.js + HNSW', version: status.version || '3.0.0', features: status.features || { vectorEmbeddings: true, hnswIndex: true, semanticSearch: true, }, }; } catch (error) { return { initialized: false, error: error instanceof Error ? error.message : 'Unknown error', }; } }, }, { name: 'memory_migrate', description: 'Manually trigger migration from legacy JSON store to sql.js Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.', category: 'memory', inputSchema: { type: 'object', properties: { force: { type: 'boolean', description: 'Force re-migration even if already done' }, }, }, handler: async (input) => { const force = input.force; // Remove migration marker if forcing if (force) { const markerPath = getMigrationMarkerPath(); if (existsSync(markerPath)) { unlinkSync(markerPath); } } // Check for legacy data const legacyStore = loadLegacyStore(); if (!legacyStore || Object.keys(legacyStore.entries).length === 0) { return { success: true, message: 'No legacy data to migrate', migrated: 0, }; } // Run migration via ensureInitialized await ensureInitialized(); return { success: true, message: 'Migration completed', migrated: Object.keys(legacyStore.entries).length, backend: 'sql.js + HNSW', }; }, }, // ===== Claude Code Memory Bridge Tools ===== { name: 'memory_import_claude', description: 'Import Claude Code auto-memory files into AgentDB with ONNX vector embeddings. Reads ~/.claude/projects/*/memory/*.md files, parses YAML frontmatter, splits into sections, and stores with 384-dim embeddings for semantic search. Use allProjects=true to import from ALL Claude projects. Pass projectPath to override cwd-based detection (#1883 — required when Ruflo runs in WSL but Claude Code is on Windows). Pass excludeFilePatterns (glob list) or excludeFiles (absolute path list) to skip voice-load-bearing, PII, or persona-restricted files (#1937). Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.', category: 'memory', inputSchema: { type: 'object', properties: { allProjects: { type: 'boolean', description: 'Import from all Claude projects (default: current project only)' }, namespace: { type: 'string', description: 'Target namespace (default: "claude-memories")' }, projectPath: { type: 'string', description: '#1883 — explicit project path to hash, used when cwd does not match Claude Code\'s view (e.g. WSL bridge to Windows host). Pass the canonical project root as Claude Code sees it.' }, excludeFilePatterns: { type: 'array', items: { type: 'string' }, description: '#1937 — glob patterns matched against the absolute file path. Files matching ANY pattern are skipped. Supports `*` (any chars within a path segment), `**` (any chars including separators), and `?` (single char). Examples: `**/voice-*.md`, `**/persona-*.md`. Combine with excludeFiles for explicit paths.', }, excludeFiles: { type: 'array', items: { type: 'string' }, description: '#1937 — absolute file paths to skip verbatim. Faster than a pattern when the list is known ahead of time (operator captured baselines). Combine with excludeFilePatterns.', }, }, }, handler: async (input) => { await ensureInitialized(); const { storeEntry } = await getMemoryFunctions(); const ns = input.namespace || 'claude-memories'; if (input.namespace) { const vNs = validateIdentifier(ns, 'namespace'); if (!vNs.valid) return { success: false, imported: 0, error: vNs.error }; } const allProjects = input.allProjects; const projectPathOverride = input.projectPath; const claudeProjectsDir = join(homedir(), '.claude', 'projects'); // #1937 — voice-fidelity / persona-restricted exclusion. const excludeFilePatterns = Array.isArray(input.excludeFilePatterns) ? input.excludeFilePatterns : []; const excludeFilesList = Array.isArray(input.excludeFiles) ? new Set(input.excludeFiles) : new Set(); const excludeRegexes = excludeFilePatterns.map(globToRegex); const isExcluded = (absPath) => { if (excludeFilesList.has(absPath)) return true; return excludeRegexes.some(re => re.test(absPath)); }; // Find memory files const memoryFiles = []; let excludedByPattern = 0; if (allProjects) { // Scan all projects if (existsSync(claudeProjectsDir)) { try { for (const project of readdirSync(claudeProjectsDir, { withFileTypes: true })) { if (!project.isDirectory()) continue; const memDir = join(claudeProjectsDir, project.name, 'memory'); if (!existsSync(memDir)) continue; for (const file of readdirSync(memDir).filter((f) => f.endsWith('.md'))) { const absPath = join(memDir, file); if (isExcluded(absPath)) { excludedByPattern++; continue; } memoryFiles.push({ path: absPath, project: project.name, file }); } } } catch { /* scan error */ } } } else { // #1883 — current project: try multiple candidate hashes (POSIX, WSL-translated, // leading-dash-stripped, space-replaced). Caller can pass projectPath to override. const resolved = resolveProjectMemoryDir(claudeProjectsDir, projectPathOverride); if (resolved) { try { for (const file of readdirSync(resolved.memDir).filter((f) => f.endsWith('.md'))) { const absPath = join(resolved.memDir, file); if (isExcluded(absPath)) { excludedByPattern++; continue; } memoryFiles.push({ path: absPath, project: resolved.projectHash, file }); } } catch { /* scan error */ } } } if (memoryFiles.length === 0) { return { success: true, imported: 0, message: 'No Claude memory files found' }; } let imported = 0; let skipped = 0; // #1791.8 — Claude Code's `~/.claude/projects/` accumulates historical // project_id directories (truncated forms, sandbox cwds, renamed // workspaces) that all contain copies of the same memory files. The // previous import indexed each copy under a different `project_id` // prefix, producing 5–8x duplication on long-lived homes. Dedupe by // file content hash so the same memory is imported once even if it // appears under several project directories. const seenContentHashes = new Set(); let duplicatesSkipped = 0; const projects = new Set(); for (const memFile of memoryFiles) { projects.add(memFile.project); try { const content = readFileSync(memFile.path, 'utf-8'); // #1791.8 — Skip if we've already imported this exact content under // a different project_id directory. const contentHash = createHash('sha256').update(content).digest('hex').slice(0, 16); if (seenContentHashes.has(contentHash)) { duplicatesSkipped++; continue; } seenContentHashes.add(contentHash); const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/); let name = memFile.file.replace('.md', ''); let body = content; if (frontmatterMatch) { const yaml = frontmatterMatch[1]; body = frontmatterMatch[2].trim(); const nameMatch = yaml.match(/^name:\s*(.+)$/m); if (nameMatch) name = nameMatch[1].trim(); } // Split into sections for granular search const sections = body.split(/^(?=## )/m).filter(s => s.trim().length > 20); if (sections.length === 0 && body.length > 10) { // #1884 — sanitize key so memory_delete can later remove it. Without // this, dangerous chars from frontmatter `name` strand the key. const key = sanitizeMemoryKey(`claude:${memFile.project}:${name}`); await storeEntry({ key, value: body.slice(0, 4096), namespace: ns, generateEmbeddingFlag: true }); imported++; } else { for (const section of sections) { const titleMatch = section.match(/^##\s+(.+)/); const sectionTitle = titleMatch ? titleMatch[1].trim() : name; const sectionBody = section.replace(/^##\s+.+\n/, '').trim(); if (sectionBody.length < 10) continue; // #1884 — sanitize so any dangerous chars in the heading don't // produce keys memory_delete will reject. const key = sanitizeMemoryKey(`claude:${memFile.project}:${name}:${sectionTitle.slice(0, 50)}`); await storeEntry({ key, value: sectionBody.slice(0, 4096), namespace: ns, generateEmbeddingFlag: true }); imported++; } } } catch { skipped++; } } // AUDIT #3: report the embedding backend truthfully — a hash-fallback // import is NOT semantically searchable, so an operator must not read // "ONNX ... (384-dim)" when the vectors are mock. let importBackend = 'unknown'; try { const { generateEmbedding } = await import('../memory/memory-initializer.js'); const probe = await generateEmbedding('memory_import_claude backend probe'); importBackend = probe.backend ?? 'unknown'; } catch { /* probe failed — leave 'unknown' */ } return { success: true, imported, skipped, duplicatesSkipped, excludedByPattern, files: memoryFiles.length, projects: projects.size, namespace: ns, embedding: `all-MiniLM-L6-v2 (384-dim, backend=${importBackend})`, embeddingBackend: importBackend, }; }, }, { name: 'memory_bridge_status', description: 'Show Claude Code memory bridge status — AgentDB vectors, SONA learning, intelligence patterns, and connection health. Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.', category: 'memory', inputSchema: { type: 'object', properties: {} }, handler: async () => { await ensureInitialized(); // Count Claude memory files const claudeProjectsDir = join(homedir(), '.claude', 'projects'); let claudeFiles = 0; let claudeProjects = 0; if (existsSync(claudeProjectsDir)) { try { for (const project of readdirSync(claudeProjectsDir, { withFileTypes: true })) { if (!project.isDirectory()) continue; const memDir = join(claudeProjectsDir, project.name, 'memory'); if (!existsSync(memDir)) continue; const files = readdirSync(memDir).filter((f) => f.endsWith('.md')); if (files.length > 0) { claudeProjects++; claudeFiles += files.length; } } } catch { /* ignore */ } } // AgentDB status // #1940: previously used `allEntries.entries.length` for the totals, // but `listEntries({})` returns the first 20 entries with a separate // `total` field for the full row count. So `memory_bridge_status` // reported `totalEntries: 0`...20 even when the DB had hundreds of // rows. Use `.total` for the count, and surface the namespaces with // entries so the report matches what's actually in the store. let agentdbEntries = 0; let claudeMemoryEntries = 0; const namespaceCounts = {}; try { const { listEntries } = await getMemoryFunctions(); const allEntries = await listEntries({}); agentdbEntries = allEntries?.total ?? allEntries?.entries?.length ?? 0; const claudeEntries = await listEntries({ namespace: 'claude-memories' }); claudeMemoryEntries = claudeEntries?.total ?? claudeEntries?.entries?.length ?? 0; // Per-namespace counts for the namespaces the reporter referenced // (#1940). Best-effort — a namespace with 0 entries is omitted. for (const ns of ['default', 'patterns', 'claude-memories', 'auto-memory', 'tasks', 'feedback', 'pretrain']) { try { const r = await listEntries({ namespace: ns }); const t = r?.total ?? r?.entries?.length ?? 0; if (t > 0) namespaceCounts[ns] = t; } catch { /* skip per-namespace failure */ } } } catch { /* ignore */ } // Intelligence status let intelligence = { sonaEnabled: false, patternsLearned: 0, trajectoriesRecorded: 0 }; try { const int = await import('../memory/intelligence.js'); const stats = int.getIntelligenceStats?.(); if (stats) intelligence = { sonaEnabled: stats.sonaEnabled, patternsLearned: stats.patternsLearned, trajectoriesRecorded: stats.trajectoriesRecorded }; } catch { /* not initialized */ } // AUDIT #3: probe the embedding backend so operators can tell real ONNX // output from the deterministic hash fallback (which has inverted/ // meaningless semantics). Without this, the status string reports the // model name unconditionally and mock output is indistinguishable. let embeddingBackend = 'unknown'; try { const { generateEmbedding } = await import('../memory/memory-initializer.js'); const probe = await generateEmbedding('memory_bridge_status backend probe'); embeddingBackend = probe.backend ?? 'unknown'; } catch { /* probe failed — leave 'unknown' */ } const embeddingLabel = `all-MiniLM-L6-v2 (384-dim, backend=${embeddingBackend})`; return { claudeCode: { memoryFiles: claudeFiles, projects: claudeProjects }, agentdb: { totalEntries: agentdbEntries, claudeMemoryEntries, namespaces: namespaceCounts, backend: embeddingBackend === 'mock' ? 'sql.js + MOCK (hash fallback)' : 'sql.js + ONNX', embeddingBackend, }, intelligence, // #1940: report 'connected' whenever ANY namespace has imported // content, not just `claude-memories` — the bridge can be in active // use from other import paths (e.g. plugin namespaces, task memory). bridge: { status: agentdbEntries > 0 ? 'connected' : 'not-synced', embedding: embeddingLabel, embeddingBackend, }, }; }, }, { name: 'memory_search_unified', description: 'Search across both Claude Code memories and AgentDB entries using semantic vector similarity. Returns merged, deduplicated results from all namespaces. Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.', category: 'memory', inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'Search query (natural language)' }, limit: { type: 'number', description: 'Max results (default: 10)' }, namespace: { type: 'string', description: 'Filter to namespace (omit for all)' }, }, required: ['query'], }, handler: async (input) => { await ensureInitialized(); const { searchEntries } = await getMemoryFunctions(); validateMemoryInput(undefined, undefined, input.query); const query = input.query; const limit = input.limit ?? 10; const ns = input.namespace; if (ns) { const vNs = validateIdentifier(ns, 'namespace'); if (!vNs.valid) return { success: false, query, results: [], total: 0, error: vNs.error }; } // Search all namespaces unless filtered const namespaces = ns ? [ns] : ['default', 'claude-memories', 'auto-memory', 'patterns', 'tasks', 'feedback']; const allResults = []; for (const searchNs of namespaces) { try { const r = await searchEntries({ query, namespace: searchNs, limit: limit * 2 }); if (r?.results) { for (const entry of r.results) { allResults.push({ key: entry.key || entry.id || '', content: (entry.content || entry.value || '').toString().slice(0, 200), score: entr