claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
1,022 lines • 63.5 kB
JavaScript
/**
* Memory MCP Tools for CLI - V3 with sql.js/HNSW Backend
*
* UPGRADED: Now uses the advanced sql.js + HNSW backend for:
* - 150x-12,500x faster semantic search
* - Vector embeddings with cosine similarity
* - Persistent SQLite storage (WASM)
* - Backward compatible with legacy JSON storage (auto-migrates)
*
* @module v3/cli/mcp-tools/memory-tools
*/
import { existsSync, mkdirSync, readdirSync, readFileSync, unlinkSync, writeFileSync } from 'fs';
import { homedir } from 'os';
import { join, resolve } from 'path';
import { createHash } from 'crypto';
import { validateIdentifier } from './validate-input.js';
// #1604: Align with memory-initializer.ts — single source of truth is .swarm/memory.db
const MEMORY_DIR = '.swarm';
const LEGACY_MEMORY_FILE = 'store.json';
const LEGACY_MEMORY_DIR = '.claude-flow/memory';
const MIGRATION_MARKER = '.migrated-to-sqlite';
function getMemoryDir() {
return resolve(MEMORY_DIR);
}
function getLegacyPath() {
return resolve(join(MEMORY_DIR, LEGACY_MEMORY_FILE));
}
function getMigrationMarkerPath() {
return resolve(join(MEMORY_DIR, MIGRATION_MARKER));
}
function ensureMemoryDir() {
const dir = getMemoryDir();
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
}
// D-2: Input bounds for memory parameters
const MAX_KEY_LENGTH = 1024;
const MAX_VALUE_SIZE = 1024 * 1024; // 1MB
const MAX_QUERY_LENGTH = 4096;
// #1425 — single source of truth for the dangerous-character set rejected by
// validateMemoryInput. Imported by sanitizeMemoryKey so write-side sanitization
// and read-side rejection can never drift apart (the symmetry bug behind #1884).
const DANGEROUS_KEY_CHARS = /[;&|`$(){}[\]<>!#\\\0]|\.\.[/\\]/g;
const DANGEROUS_KEY_PATTERN = /[;&|`$(){}[\]<>!#\\\0]|\.\.[/\\]/;
function validateMemoryInput(key, value, query, namespace) {
if (key && key.length > MAX_KEY_LENGTH) {
throw new Error(`Key exceeds maximum length of ${MAX_KEY_LENGTH} characters`);
}
if (value && value.length > MAX_VALUE_SIZE) {
throw new Error(`Value exceeds maximum size of ${MAX_VALUE_SIZE} bytes`);
}
if (query && query.length > MAX_QUERY_LENGTH) {
throw new Error(`Query exceeds maximum length of ${MAX_QUERY_LENGTH} characters`);
}
// Reject path traversal and shell metacharacters in keys/namespaces (#1425)
if (key && DANGEROUS_KEY_PATTERN.test(key)) {
throw new Error('Key contains disallowed characters');
}
if (namespace && DANGEROUS_KEY_PATTERN.test(namespace)) {
throw new Error('Namespace contains disallowed characters');
}
}
// #1884 — sanitize a key produced from arbitrary input (markdown headings,
// frontmatter names, file names) so it survives validateMemoryInput on the
// read/delete path. Replaces every dangerous char with `_`. Truncates to
// MAX_KEY_LENGTH so the bound check in validateMemoryInput also passes.
// Keep this in sync with DANGEROUS_KEY_PATTERN — they share DANGEROUS_KEY_CHARS.
function sanitizeMemoryKey(key) {
const safe = key.replace(DANGEROUS_KEY_CHARS, '_');
return safe.length > MAX_KEY_LENGTH ? safe.slice(0, MAX_KEY_LENGTH) : safe;
}
// #1937 — minimal glob → RegExp helper for memory_import_claude exclusion
// patterns. Anchored. Supports the three operators the issue's voice-fidelity
// workflow needs:
// `**` — any chars including path separators
// `*` — any chars except path separators
// `?` — exactly one char except a path separator
// Everything else is regex-escaped. Used to match absolute file paths.
function globToRegex(pattern) {
// Tokenize so we can replace `**` before `*` without overlap.
let out = '';
for (let i = 0; i < pattern.length; i++) {
const c = pattern[i];
if (c === '*' && pattern[i + 1] === '*') {
out += '.*';
i++;
}
else if (c === '*') {
out += '[^/\\\\]*';
}
else if (c === '?') {
out += '[^/\\\\]';
}
else if (/[.+^$|(){}\[\]\\]/.test(c)) {
out += '\\' + c;
}
else {
out += c;
}
}
return new RegExp('^' + out + '$');
}
// #1883 — resolve the Claude-Code project memory directory for the *current*
// project. Claude Code hashes the project path differently per host OS, and
// our previous logic only POSIX-slash-replaced cwd, which breaks for:
// - WSL bridges where cwd is `/mnt/<drive>/...` but Claude Code is on Windows
// - paths containing spaces (Claude Code replaces spaces with dashes)
// - any leading slash on POSIX (Claude Code strips it)
// Strategy: try several candidate hashes and return the first one with a
// memory dir that exists. An explicit `projectPathOverride` short-circuits
// the heuristics for callers that know the canonical project path.
function resolveProjectMemoryDir(claudeProjectsDir, projectPathOverride) {
const candidates = new Set();
const sources = [];
if (projectPathOverride && projectPathOverride.length > 0) {
sources.push(projectPathOverride);
}
else {
sources.push(process.cwd());
}
for (const source of sources) {
// Candidate 1: legacy POSIX hash — what shipped before #1883
candidates.add(source.replace(/\//g, '-'));
// Candidate 2: WSL `/mnt/<drive>/...` translated to Claude-Code Windows hash
// e.g. `/mnt/c/Users/x/Project Name` → `C--Users-x-Project-Name`
const wsl = source.match(/^\/mnt\/([a-z])(\/.*)?$/i);
if (wsl) {
const drive = wsl[1].toUpperCase();
const rest = (wsl[2] ?? '').replace(/\//g, '-').replace(/ /g, '-');
candidates.add(`${drive}-${rest}`);
}
// Candidate 3: POSIX hash with leading dash stripped (Claude Code on macOS/Linux)
const stripped = source.replace(/\//g, '-').replace(/^-+/, '');
candidates.add(stripped);
// Candidate 4: spaces replaced with dashes (Claude Code's space rule)
candidates.add(source.replace(/\//g, '-').replace(/ /g, '-'));
// Candidate 5 (#1939): native Win32 path on a Win32 Claude Code install.
// `C:\Users\tobia\OneDrive\Desktop\Claude Stuff` →
// `C--Users-tobia-OneDrive-Desktop-Claude-Stuff`. Claude Code's on-disk
// slug replaces drive-colon AND backslashes AND whitespace with `-`.
// The earlier candidates only handled forward slashes, so a Win32+Win32
// setup never matched.
if (/^[A-Za-z]:[\\/]/.test(source)) {
candidates.add(source.replace(/[:\\/]/g, '-').replace(/\s+/g, '-'));
}
}
for (const projectHash of candidates) {
const memDir = join(claudeProjectsDir, projectHash, 'memory');
if (existsSync(memDir))
return { memDir, projectHash };
}
return null;
}
/**
* Check if legacy JSON store exists in old .claude-flow/memory/ location
*/
function hasLegacyStore() {
const legacyPath = resolve(join(LEGACY_MEMORY_DIR, LEGACY_MEMORY_FILE));
const migrationMarker = resolve(join(LEGACY_MEMORY_DIR, MIGRATION_MARKER));
return existsSync(legacyPath) && !existsSync(migrationMarker);
}
/**
* Load legacy JSON store for migration
*/
function loadLegacyStore() {
try {
const legacyPath = resolve(join(LEGACY_MEMORY_DIR, LEGACY_MEMORY_FILE));
if (existsSync(legacyPath)) {
const data = readFileSync(legacyPath, 'utf-8');
return JSON.parse(data);
}
}
catch {
// Return null on error
}
return null;
}
/**
* Mark migration as complete
*/
function markMigrationComplete() {
const legacyDir = resolve(LEGACY_MEMORY_DIR);
if (!existsSync(legacyDir))
mkdirSync(legacyDir, { recursive: true });
writeFileSync(resolve(join(LEGACY_MEMORY_DIR, MIGRATION_MARKER)), JSON.stringify({
migratedAt: new Date().toISOString(),
version: '3.0.0',
}), 'utf-8');
}
/**
* Lazy-load memory initializer functions to avoid circular deps
*/
async function getMemoryFunctions() {
const { storeEntry, searchEntries, listEntries, getEntry, deleteEntry, initializeMemoryDatabase, checkMemoryInitialization, } = await import('../memory/memory-initializer.js');
return {
storeEntry,
searchEntries,
listEntries,
getEntry,
deleteEntry,
initializeMemoryDatabase,
checkMemoryInitialization,
};
}
/**
* Ensure memory database is initialized and migrate legacy data if needed.
* #1606: Wrapped in try/catch to prevent process-level crashes that kill
* the stdio MCP transport on Windows/Codex.
*/
async function ensureInitialized() {
try {
const { initializeMemoryDatabase, checkMemoryInitialization, storeEntry } = await getMemoryFunctions();
// Check if already initialized
const status = await checkMemoryInitialization();
if (!status.initialized) {
await initializeMemoryDatabase({ force: false, verbose: false });
}
// Migrate legacy JSON data if exists (from old .claude-flow/memory/ location)
if (hasLegacyStore()) {
const legacyStore = loadLegacyStore();
if (legacyStore && Object.keys(legacyStore.entries).length > 0) {
console.error('[MCP Memory] Migrating legacy JSON store to sql.js...');
let migrated = 0;
for (const [key, entry] of Object.entries(legacyStore.entries)) {
try {
const value = typeof entry.value === 'string' ? entry.value : JSON.stringify(entry.value);
await storeEntry({
key,
value,
namespace: 'default',
generateEmbeddingFlag: true,
});
migrated++;
}
catch (e) {
console.error(`[MCP Memory] Failed to migrate key "${key}":`, e);
}
}
console.error(`[MCP Memory] Migrated ${migrated}/${Object.keys(legacyStore.entries).length} entries`);
markMigrationComplete();
}
}
}
catch (error) {
console.error('[MCP Memory] Initialization failed:', error instanceof Error ? error.message : error);
}
}
export const memoryTools = [
{
name: 'memory_store',
description: 'Persistent key-value store with vector embedding — survives across sessions and is searchable by meaning, not just by file path. Use when native Write is wrong because the data is not a file (e.g. a learned pattern, a decision, a budget config) AND you need to recall it later by semantic query, not by path. Defaults to namespace="default"; pass --upsert=true to update an existing key.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
key: { type: 'string', description: 'Memory key (unique within namespace)' },
value: { description: 'Value to store (string or object)' },
namespace: { type: 'string', description: 'Namespace for organization (default: "default")' },
tags: {
type: 'array',
items: { type: 'string' },
description: 'Optional tags for filtering',
},
ttl: { type: 'number', description: 'Time-to-live in seconds (optional)' },
upsert: { type: 'boolean', description: 'If true, update existing key instead of failing (default: false)' },
},
required: ['key', 'value'],
},
handler: async (input) => {
await ensureInitialized();
const { storeEntry } = await getMemoryFunctions();
const key = input.key;
const namespace = input.namespace || 'default';
const rawValue = input.value;
const value = typeof rawValue === 'string' ? rawValue : (rawValue !== undefined ? JSON.stringify(rawValue) : '');
const tags = input.tags || [];
const ttl = input.ttl;
const upsert = input.upsert || false;
if (!value) {
return {
success: false,
key,
stored: false,
hasEmbedding: false,
error: 'Value is required and cannot be empty',
};
}
validateMemoryInput(key, value, undefined, namespace);
const startTime = performance.now();
try {
const result = await storeEntry({
key,
value,
namespace,
generateEmbeddingFlag: true,
tags,
ttl,
upsert,
});
const duration = performance.now() - startTime;
return {
success: result.success,
key,
namespace,
stored: result.success,
storedAt: new Date().toISOString(),
hasEmbedding: !!result.embedding,
embeddingDimensions: result.embedding?.dimensions || null,
backend: 'sql.js + HNSW',
storeTime: `${duration.toFixed(2)}ms`,
error: result.error,
};
}
catch (error) {
return {
success: false,
key,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_retrieve',
description: 'Read back a value previously stored via memory_store, by exact (namespace, key) — lossless, includes metadata. Use when native Read is wrong because the value is not a file (it lives in the .swarm/memory.db SQLite store) AND you know the exact key. For semantic lookup by meaning, use memory_search.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
key: { type: 'string', description: 'Memory key' },
namespace: { type: 'string', description: 'Namespace (default: "default")' },
},
required: ['key'],
},
handler: async (input) => {
await ensureInitialized();
const { getEntry } = await getMemoryFunctions();
const key = input.key;
const namespace = input.namespace || 'default';
validateMemoryInput(key, undefined, undefined, namespace);
try {
const result = await getEntry({ key, namespace });
if (result.found && result.entry) {
// Try to parse JSON value
let value = result.entry.content;
try {
value = JSON.parse(result.entry.content);
}
catch {
// Keep as string
}
return {
key,
namespace,
value,
tags: result.entry.tags,
storedAt: result.entry.createdAt,
updatedAt: result.entry.updatedAt,
accessCount: result.entry.accessCount,
hasEmbedding: result.entry.hasEmbedding,
found: true,
backend: 'sql.js + HNSW',
};
}
return {
key,
namespace,
value: null,
found: false,
};
}
catch (error) {
return {
key,
namespace,
value: null,
found: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_search',
description: 'Find stored memories by meaning (vector similarity), not by literal text — finds "JWT auth pattern" when you query "token-based login flow". Use when native Grep is wrong because Grep matches characters and you need to find conceptually-related entries across past sessions. Backed by HNSW index over ONNX embeddings; returns top-k with similarity scores. Pair with smart=true for query expansion + MMR diversity.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'Search query (semantic similarity)' },
namespace: { type: 'string', description: 'Namespace to search (default: "default")' },
limit: { type: 'number', description: 'Maximum results (default: 10)' },
threshold: { type: 'number', description: 'Minimum similarity threshold 0-1 (default: 0.3)' },
smart: { type: 'boolean', description: 'Enable SmartRetrieval pipeline — query expansion, RRF fusion, recency boost, MMR diversity (default: false)' },
},
required: ['query'],
},
handler: async (input) => {
await ensureInitialized();
const { searchEntries } = await getMemoryFunctions();
const query = input.query;
const namespace = input.namespace || 'default';
const limit = input.limit ?? 10;
const threshold = input.threshold ?? 0.3;
validateMemoryInput(undefined, undefined, query);
const startTime = performance.now();
try {
// #1846: feature-detect smartSearch on the resolved memory package.
// The export landed in @claude-flow/memory@>3.0.0-alpha.14 — older
// installs pin to a build that exposes search/store/retrieve but
// not smartSearch. Throwing `is not a function` is hostile; instead
// detect at runtime and gracefully fall through to plain semantic
// search with an explicit fallback note.
let smartFallbackReason;
if (input.smart) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let memMod;
try {
memMod = await import('@claude-flow/memory');
}
catch (err) {
smartFallbackReason = `@claude-flow/memory failed to load: ${err.message}`;
}
const smartSearch = memMod && typeof memMod.smartSearch === 'function'
? memMod.smartSearch
: undefined;
if (smartSearch) {
// SmartRetrieval pipeline (ADR-090)
const rawSearch = async (req) => {
const r = await searchEntries({
query: req.query,
namespace: req.namespace || namespace,
limit: req.limit || limit * 3,
threshold: req.threshold ?? threshold,
});
return {
results: r.results.map(e => ({
id: e.id,
key: e.key,
content: e.content,
score: e.score,
namespace: e.namespace,
})),
};
};
const smartResult = await smartSearch(rawSearch, {
query,
namespace,
limit,
threshold,
});
const duration = performance.now() - startTime;
const results = smartResult.results.map((r) => {
let value = r.content;
try {
value = JSON.parse(r.content);
}
catch { /* keep as string */ }
return {
key: r.key,
namespace: r.namespace,
value,
similarity: r.score,
};
});
return {
query,
results,
total: results.length,
searchTime: `${duration.toFixed(2)}ms`,
backend: 'SmartRetrieval (RRF + MMR + Recency)',
stats: smartResult.stats,
};
}
// smart=true but smartSearch unavailable on installed package.
// Fall through to plain search with an explicit warning.
smartFallbackReason = smartFallbackReason
?? 'smartSearch is not exported by the installed @claude-flow/memory build (likely a release lag — see #1846). Falling back to standard semantic search.';
}
// Original non-smart path (unchanged) — also reached when smart was
// requested but unavailable. We attach `smartFallback` to the
// response so callers can see the degradation explicitly.
const result = await searchEntries({
query,
namespace,
limit,
threshold,
});
const duration = performance.now() - startTime;
// Parse JSON values in results
const results = result.results.map(r => {
let value = r.content;
try {
value = JSON.parse(r.content);
}
catch {
// Keep as string
}
return {
key: r.key,
namespace: r.namespace,
value,
similarity: r.score,
};
});
return {
query,
results,
total: results.length,
searchTime: `${duration.toFixed(2)}ms`,
backend: 'HNSW + sql.js',
...(smartFallbackReason ? { smartFallback: smartFallbackReason } : {}),
};
}
catch (error) {
return {
query,
results: [],
total: 0,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_delete',
description: 'Remove a stored memory entry by exact (namespace, key). Use when a previously stored decision is invalidated or contains stale data. No native equivalent — Write to a file does not affect the .swarm/memory.db SQLite store.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
key: { type: 'string', description: 'Memory key' },
namespace: { type: 'string', description: 'Namespace (default: "default")' },
},
required: ['key'],
},
handler: async (input) => {
await ensureInitialized();
const { deleteEntry } = await getMemoryFunctions();
const key = input.key;
const namespace = input.namespace || 'default';
validateMemoryInput(key, undefined, undefined, namespace);
try {
const result = await deleteEntry({ key, namespace });
return {
success: result.deleted,
key,
namespace,
deleted: result.deleted,
hnswIndexInvalidated: result.deleted,
backend: 'sql.js + HNSW',
};
}
catch (error) {
return {
success: false,
key,
namespace,
deleted: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_list',
description: 'Enumerate stored memory entries (optionally filtered by namespace/tags) without semantic search. Use when native Glob is wrong because the entries are not files (they live in .swarm/memory.db). For inspection / audit / "what is in my memory" — pair with memory_search for retrieval-by-meaning.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
namespace: { type: 'string', description: 'Filter by namespace' },
limit: { type: 'number', description: 'Maximum results (default: 50)' },
offset: { type: 'number', description: 'Offset for pagination (default: 0)' },
},
},
handler: async (input) => {
await ensureInitialized();
const { listEntries } = await getMemoryFunctions();
const namespace = input.namespace;
const limit = input.limit || 50;
const offset = input.offset || 0;
if (namespace) {
const vNs = validateIdentifier(namespace, 'namespace');
if (!vNs.valid)
throw new Error(vNs.error);
}
try {
const result = await listEntries({
namespace,
limit,
offset,
});
const entries = result.entries.map(e => ({
key: e.key,
namespace: e.namespace,
storedAt: e.createdAt,
updatedAt: e.updatedAt,
accessCount: e.accessCount,
hasEmbedding: e.hasEmbedding,
size: e.size,
}));
return {
entries,
total: result.total,
limit,
offset,
backend: 'sql.js + HNSW',
};
}
catch (error) {
return {
entries: [],
total: 0,
limit,
offset,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_stats',
description: 'Get memory storage statistics including HNSW index status Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {},
},
handler: async () => {
await ensureInitialized();
const { checkMemoryInitialization, listEntries } = await getMemoryFunctions();
try {
const status = await checkMemoryInitialization();
const allEntries = await listEntries({ limit: 100000 });
// Count by namespace
const namespaces = {};
let withEmbeddings = 0;
for (const entry of allEntries.entries) {
namespaces[entry.namespace] = (namespaces[entry.namespace] || 0) + 1;
if (entry.hasEmbedding)
withEmbeddings++;
}
return {
initialized: status.initialized,
totalEntries: allEntries.total,
entriesWithEmbeddings: withEmbeddings,
embeddingCoverage: allEntries.total > 0
? `${((withEmbeddings / allEntries.total) * 100).toFixed(1)}%`
: '0%',
namespaces,
backend: 'sql.js + HNSW',
version: status.version || '3.0.0',
features: status.features || {
vectorEmbeddings: true,
hnswIndex: true,
semanticSearch: true,
},
};
}
catch (error) {
return {
initialized: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
},
},
{
name: 'memory_migrate',
description: 'Manually trigger migration from legacy JSON store to sql.js Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
force: { type: 'boolean', description: 'Force re-migration even if already done' },
},
},
handler: async (input) => {
const force = input.force;
// Remove migration marker if forcing
if (force) {
const markerPath = getMigrationMarkerPath();
if (existsSync(markerPath)) {
unlinkSync(markerPath);
}
}
// Check for legacy data
const legacyStore = loadLegacyStore();
if (!legacyStore || Object.keys(legacyStore.entries).length === 0) {
return {
success: true,
message: 'No legacy data to migrate',
migrated: 0,
};
}
// Run migration via ensureInitialized
await ensureInitialized();
return {
success: true,
message: 'Migration completed',
migrated: Object.keys(legacyStore.entries).length,
backend: 'sql.js + HNSW',
};
},
},
// ===== Claude Code Memory Bridge Tools =====
{
name: 'memory_import_claude',
description: 'Import Claude Code auto-memory files into AgentDB with ONNX vector embeddings. Reads ~/.claude/projects/*/memory/*.md files, parses YAML frontmatter, splits into sections, and stores with 384-dim embeddings for semantic search. Use allProjects=true to import from ALL Claude projects. Pass projectPath to override cwd-based detection (#1883 — required when Ruflo runs in WSL but Claude Code is on Windows). Pass excludeFilePatterns (glob list) or excludeFiles (absolute path list) to skip voice-load-bearing, PII, or persona-restricted files (#1937). Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
allProjects: { type: 'boolean', description: 'Import from all Claude projects (default: current project only)' },
namespace: { type: 'string', description: 'Target namespace (default: "claude-memories")' },
projectPath: { type: 'string', description: '#1883 — explicit project path to hash, used when cwd does not match Claude Code\'s view (e.g. WSL bridge to Windows host). Pass the canonical project root as Claude Code sees it.' },
excludeFilePatterns: {
type: 'array',
items: { type: 'string' },
description: '#1937 — glob patterns matched against the absolute file path. Files matching ANY pattern are skipped. Supports `*` (any chars within a path segment), `**` (any chars including separators), and `?` (single char). Examples: `**/voice-*.md`, `**/persona-*.md`. Combine with excludeFiles for explicit paths.',
},
excludeFiles: {
type: 'array',
items: { type: 'string' },
description: '#1937 — absolute file paths to skip verbatim. Faster than a pattern when the list is known ahead of time (operator captured baselines). Combine with excludeFilePatterns.',
},
},
},
handler: async (input) => {
await ensureInitialized();
const { storeEntry } = await getMemoryFunctions();
const ns = input.namespace || 'claude-memories';
if (input.namespace) {
const vNs = validateIdentifier(ns, 'namespace');
if (!vNs.valid)
return { success: false, imported: 0, error: vNs.error };
}
const allProjects = input.allProjects;
const projectPathOverride = input.projectPath;
const claudeProjectsDir = join(homedir(), '.claude', 'projects');
// #1937 — voice-fidelity / persona-restricted exclusion.
const excludeFilePatterns = Array.isArray(input.excludeFilePatterns) ? input.excludeFilePatterns : [];
const excludeFilesList = Array.isArray(input.excludeFiles) ? new Set(input.excludeFiles) : new Set();
const excludeRegexes = excludeFilePatterns.map(globToRegex);
const isExcluded = (absPath) => {
if (excludeFilesList.has(absPath))
return true;
return excludeRegexes.some(re => re.test(absPath));
};
// Find memory files
const memoryFiles = [];
let excludedByPattern = 0;
if (allProjects) {
// Scan all projects
if (existsSync(claudeProjectsDir)) {
try {
for (const project of readdirSync(claudeProjectsDir, { withFileTypes: true })) {
if (!project.isDirectory())
continue;
const memDir = join(claudeProjectsDir, project.name, 'memory');
if (!existsSync(memDir))
continue;
for (const file of readdirSync(memDir).filter((f) => f.endsWith('.md'))) {
const absPath = join(memDir, file);
if (isExcluded(absPath)) {
excludedByPattern++;
continue;
}
memoryFiles.push({ path: absPath, project: project.name, file });
}
}
}
catch { /* scan error */ }
}
}
else {
// #1883 — current project: try multiple candidate hashes (POSIX, WSL-translated,
// leading-dash-stripped, space-replaced). Caller can pass projectPath to override.
const resolved = resolveProjectMemoryDir(claudeProjectsDir, projectPathOverride);
if (resolved) {
try {
for (const file of readdirSync(resolved.memDir).filter((f) => f.endsWith('.md'))) {
const absPath = join(resolved.memDir, file);
if (isExcluded(absPath)) {
excludedByPattern++;
continue;
}
memoryFiles.push({ path: absPath, project: resolved.projectHash, file });
}
}
catch { /* scan error */ }
}
}
if (memoryFiles.length === 0) {
return { success: true, imported: 0, message: 'No Claude memory files found' };
}
let imported = 0;
let skipped = 0;
// #1791.8 — Claude Code's `~/.claude/projects/` accumulates historical
// project_id directories (truncated forms, sandbox cwds, renamed
// workspaces) that all contain copies of the same memory files. The
// previous import indexed each copy under a different `project_id`
// prefix, producing 5–8x duplication on long-lived homes. Dedupe by
// file content hash so the same memory is imported once even if it
// appears under several project directories.
const seenContentHashes = new Set();
let duplicatesSkipped = 0;
const projects = new Set();
for (const memFile of memoryFiles) {
projects.add(memFile.project);
try {
const content = readFileSync(memFile.path, 'utf-8');
// #1791.8 — Skip if we've already imported this exact content under
// a different project_id directory.
const contentHash = createHash('sha256').update(content).digest('hex').slice(0, 16);
if (seenContentHashes.has(contentHash)) {
duplicatesSkipped++;
continue;
}
seenContentHashes.add(contentHash);
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
let name = memFile.file.replace('.md', '');
let body = content;
if (frontmatterMatch) {
const yaml = frontmatterMatch[1];
body = frontmatterMatch[2].trim();
const nameMatch = yaml.match(/^name:\s*(.+)$/m);
if (nameMatch)
name = nameMatch[1].trim();
}
// Split into sections for granular search
const sections = body.split(/^(?=## )/m).filter(s => s.trim().length > 20);
if (sections.length === 0 && body.length > 10) {
// #1884 — sanitize key so memory_delete can later remove it. Without
// this, dangerous chars from frontmatter `name` strand the key.
const key = sanitizeMemoryKey(`claude:${memFile.project}:${name}`);
await storeEntry({ key, value: body.slice(0, 4096), namespace: ns, generateEmbeddingFlag: true });
imported++;
}
else {
for (const section of sections) {
const titleMatch = section.match(/^##\s+(.+)/);
const sectionTitle = titleMatch ? titleMatch[1].trim() : name;
const sectionBody = section.replace(/^##\s+.+\n/, '').trim();
if (sectionBody.length < 10)
continue;
// #1884 — sanitize so any dangerous chars in the heading don't
// produce keys memory_delete will reject.
const key = sanitizeMemoryKey(`claude:${memFile.project}:${name}:${sectionTitle.slice(0, 50)}`);
await storeEntry({ key, value: sectionBody.slice(0, 4096), namespace: ns, generateEmbeddingFlag: true });
imported++;
}
}
}
catch {
skipped++;
}
}
// AUDIT #3: report the embedding backend truthfully — a hash-fallback
// import is NOT semantically searchable, so an operator must not read
// "ONNX ... (384-dim)" when the vectors are mock.
let importBackend = 'unknown';
try {
const { generateEmbedding } = await import('../memory/memory-initializer.js');
const probe = await generateEmbedding('memory_import_claude backend probe');
importBackend = probe.backend ?? 'unknown';
}
catch { /* probe failed — leave 'unknown' */ }
return {
success: true,
imported,
skipped,
duplicatesSkipped,
excludedByPattern,
files: memoryFiles.length,
projects: projects.size,
namespace: ns,
embedding: `all-MiniLM-L6-v2 (384-dim, backend=${importBackend})`,
embeddingBackend: importBackend,
};
},
},
{
name: 'memory_bridge_status',
description: 'Show Claude Code memory bridge status — AgentDB vectors, SONA learning, intelligence patterns, and connection health. Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.',
category: 'memory',
inputSchema: { type: 'object', properties: {} },
handler: async () => {
await ensureInitialized();
// Count Claude memory files
const claudeProjectsDir = join(homedir(), '.claude', 'projects');
let claudeFiles = 0;
let claudeProjects = 0;
if (existsSync(claudeProjectsDir)) {
try {
for (const project of readdirSync(claudeProjectsDir, { withFileTypes: true })) {
if (!project.isDirectory())
continue;
const memDir = join(claudeProjectsDir, project.name, 'memory');
if (!existsSync(memDir))
continue;
const files = readdirSync(memDir).filter((f) => f.endsWith('.md'));
if (files.length > 0) {
claudeProjects++;
claudeFiles += files.length;
}
}
}
catch { /* ignore */ }
}
// AgentDB status
// #1940: previously used `allEntries.entries.length` for the totals,
// but `listEntries({})` returns the first 20 entries with a separate
// `total` field for the full row count. So `memory_bridge_status`
// reported `totalEntries: 0`...20 even when the DB had hundreds of
// rows. Use `.total` for the count, and surface the namespaces with
// entries so the report matches what's actually in the store.
let agentdbEntries = 0;
let claudeMemoryEntries = 0;
const namespaceCounts = {};
try {
const { listEntries } = await getMemoryFunctions();
const allEntries = await listEntries({});
agentdbEntries = allEntries?.total
?? allEntries?.entries?.length ?? 0;
const claudeEntries = await listEntries({ namespace: 'claude-memories' });
claudeMemoryEntries = claudeEntries?.total
?? claudeEntries?.entries?.length ?? 0;
// Per-namespace counts for the namespaces the reporter referenced
// (#1940). Best-effort — a namespace with 0 entries is omitted.
for (const ns of ['default', 'patterns', 'claude-memories', 'auto-memory', 'tasks', 'feedback', 'pretrain']) {
try {
const r = await listEntries({ namespace: ns });
const t = r?.total ?? r?.entries?.length ?? 0;
if (t > 0)
namespaceCounts[ns] = t;
}
catch { /* skip per-namespace failure */ }
}
}
catch { /* ignore */ }
// Intelligence status
let intelligence = { sonaEnabled: false, patternsLearned: 0, trajectoriesRecorded: 0 };
try {
const int = await import('../memory/intelligence.js');
const stats = int.getIntelligenceStats?.();
if (stats)
intelligence = { sonaEnabled: stats.sonaEnabled, patternsLearned: stats.patternsLearned, trajectoriesRecorded: stats.trajectoriesRecorded };
}
catch { /* not initialized */ }
// AUDIT #3: probe the embedding backend so operators can tell real ONNX
// output from the deterministic hash fallback (which has inverted/
// meaningless semantics). Without this, the status string reports the
// model name unconditionally and mock output is indistinguishable.
let embeddingBackend = 'unknown';
try {
const { generateEmbedding } = await import('../memory/memory-initializer.js');
const probe = await generateEmbedding('memory_bridge_status backend probe');
embeddingBackend = probe.backend ?? 'unknown';
}
catch { /* probe failed — leave 'unknown' */ }
const embeddingLabel = `all-MiniLM-L6-v2 (384-dim, backend=${embeddingBackend})`;
return {
claudeCode: { memoryFiles: claudeFiles, projects: claudeProjects },
agentdb: {
totalEntries: agentdbEntries,
claudeMemoryEntries,
namespaces: namespaceCounts,
backend: embeddingBackend === 'mock' ? 'sql.js + MOCK (hash fallback)' : 'sql.js + ONNX',
embeddingBackend,
},
intelligence,
// #1940: report 'connected' whenever ANY namespace has imported
// content, not just `claude-memories` — the bridge can be in active
// use from other import paths (e.g. plugin namespaces, task memory).
bridge: {
status: agentdbEntries > 0 ? 'connected' : 'not-synced',
embedding: embeddingLabel,
embeddingBackend,
},
};
},
},
{
name: 'memory_search_unified',
description: 'Search across both Claude Code memories and AgentDB entries using semantic vector similarity. Returns merged, deduplicated results from all namespaces. Use when native Read/Write is wrong because you need (a) cross-session retrieval by semantic similarity (vector embeddings) not by file path, (b) namespacing across projects without managing directory layout, or (c) the .swarm/memory.db audit trail. For one-shot file I/O, native Read/Write is fine.',
category: 'memory',
inputSchema: {
type: 'object',
properties: {
query: { type: 'string', description: 'Search query (natural language)' },
limit: { type: 'number', description: 'Max results (default: 10)' },
namespace: { type: 'string', description: 'Filter to namespace (omit for all)' },
},
required: ['query'],
},
handler: async (input) => {
await ensureInitialized();
const { searchEntries } = await getMemoryFunctions();
validateMemoryInput(undefined, undefined, input.query);
const query = input.query;
const limit = input.limit ?? 10;
const ns = input.namespace;
if (ns) {
const vNs = validateIdentifier(ns, 'namespace');
if (!vNs.valid)
return { success: false, query, results: [], total: 0, error: vNs.error };
}
// Search all namespaces unless filtered
const namespaces = ns ? [ns] : ['default', 'claude-memories', 'auto-memory', 'patterns', 'tasks', 'feedback'];
const allResults = [];
for (const searchNs of namespaces) {
try {
const r = await searchEntries({ query, namespace: searchNs, limit: limit * 2 });
if (r?.results) {
for (const entry of r.results) {
allResults.push({
key: entry.key || entry.id || '',
content: (entry.content || entry.value || '').toString().slice(0, 200),
score: entr