mem0ai

Version:

The Memory Layer For Your AI Apps

1 lines • 479 kB

Source Map (JSON)

{"version":3,"sources":["../../src/oss/src/memory/index.ts","../../src/oss/src/types/index.ts","../../src/oss/src/embeddings/openai.ts","../../src/oss/src/embeddings/ollama.ts","../../src/oss/src/utils/logger.ts","../../src/oss/src/embeddings/lmstudio.ts","../../src/oss/src/llms/openai.ts","../../src/oss/src/llms/openai_structured.ts","../../src/oss/src/llms/anthropic.ts","../../src/oss/src/llms/groq.ts","../../src/oss/src/llms/mistral.ts","../../src/oss/src/vector_stores/memory.ts","../../src/oss/src/utils/sqlite.ts","../../src/oss/src/vector_stores/qdrant.ts","../../src/oss/src/vector_stores/vectorize.ts","../../src/oss/src/vector_stores/redis.ts","../../src/oss/src/llms/ollama.ts","../../src/oss/src/llms/lmstudio.ts","../../src/oss/src/llms/deepseek.ts","../../src/oss/src/vector_stores/supabase.ts","../../src/oss/src/storage/SQLiteManager.ts","../../src/oss/src/storage/MemoryHistoryManager.ts","../../src/oss/src/storage/SupabaseHistoryManager.ts","../../src/oss/src/embeddings/google.ts","../../src/oss/src/llms/google.ts","../../src/oss/src/llms/azure.ts","../../src/oss/src/embeddings/azure.ts","../../src/oss/src/llms/langchain.ts","../../src/oss/src/prompts/index.ts","../../src/oss/src/embeddings/langchain.ts","../../src/oss/src/vector_stores/langchain.ts","../../src/oss/src/vector_stores/azure_ai_search.ts","../../src/oss/src/vector_stores/pgvector.ts","../../src/oss/src/utils/factory.ts","../../src/oss/src/storage/DummyHistoryManager.ts","../../src/oss/src/config/defaults.ts","../../src/oss/src/config/manager.ts","../../src/oss/src/utils/memory.ts","../../src/oss/src/utils/telemetry.ts","../../src/oss/src/utils/lemmatization.ts","../../src/oss/src/utils/entity_extraction.ts","../../src/oss/src/utils/scoring.ts","../../src/client/config.ts"],"sourcesContent":["import { v4 as uuidv4 } from \"uuid\";\nimport { createHash } from \"crypto\";\nimport {\n MemoryConfig,\n MemoryConfigSchema,\n MemoryItem,\n Message,\n SearchFilters,\n SearchResult,\n} from \"../types\";\nimport {\n EmbedderFactory,\n LLMFactory,\n VectorStoreFactory,\n HistoryManagerFactory,\n} from \"../utils/factory\";\nimport {\n FactRetrievalSchema,\n getFactRetrievalMessages,\n getUpdateMemoryMessages,\n parseMessages,\n extractJson,\n ADDITIVE_EXTRACTION_PROMPT,\n AGENT_CONTEXT_SUFFIX,\n AdditiveExtractionSchema,\n generateAdditiveExtractionPrompt,\n} from \"../prompts\";\nimport { DummyHistoryManager } from \"../storage/DummyHistoryManager\";\nimport { Embedder } from \"../embeddings/base\";\nimport { LLM } from \"../llms/base\";\nimport { VectorStore } from \"../vector_stores/base\";\nimport { ConfigManager } from \"../config/manager\";\n\nimport {\n AddMemoryOptions,\n SearchMemoryOptions,\n DeleteAllMemoryOptions,\n GetAllMemoryOptions,\n} from \"./memory.types\";\nimport { parse_vision_messages } from \"../utils/memory\";\nimport { HistoryManager } from \"../storage/base\";\nimport { captureClientEvent } from \"../utils/telemetry\";\nimport { lemmatizeForBm25 } from \"../utils/lemmatization\";\nimport {\n extractEntities,\n extractEntitiesBatch,\n} from \"../utils/entity_extraction\";\nimport {\n scoreAndRank,\n getBm25Params,\n normalizeBm25,\n ENTITY_BOOST_WEIGHT,\n ScoredResult,\n} from \"../utils/scoring\";\nimport { getDefaultVectorStoreDbPath } from \"../utils/sqlite\";\nimport { getOrCreateMem0UserId } from \"../../../client/config\";\n\n// Entity params that must be passed via filters - check both snake_case and camelCase\nconst ENTITY_PARAMS = [\n \"user_id\",\n \"agent_id\",\n \"run_id\",\n \"userId\",\n \"agentId\",\n \"runId\",\n];\n\n/**\n * Validates that no top-level entity parameters are passed in config.\n * @throws Error if entity params are found at top level\n */\nfunction rejectTopLevelEntityParams(\n config: Record<string, any>,\n methodName: string,\n): void {\n const invalidKeys = Object.keys(config).filter((k) =>\n ENTITY_PARAMS.includes(k),\n );\n if (invalidKeys.length > 0) {\n throw new Error(\n `Top-level entity parameters [${invalidKeys.join(\", \")}] are not supported in ${methodName}(). ` +\n `Use filters: { userId: \"...\" } instead.`,\n );\n }\n}\n\n/**\n * Validates and normalizes an entity ID.\n * - Trims leading/trailing whitespace\n * - Rejects empty or whitespace-only strings\n * - Rejects strings containing internal whitespace\n * @returns The trimmed entity ID, or undefined if input is undefined\n * @throws Error if entity ID is invalid\n */\nfunction validateAndTrimEntityId(\n value: string | undefined,\n name: string,\n): string | undefined {\n if (value === undefined) return undefined;\n const trimmed = value.trim();\n if (trimmed === \"\") {\n throw new Error(\n `Invalid ${name}: cannot be empty or whitespace-only. Provide a valid identifier.`,\n );\n }\n if (/\\s/.test(trimmed)) {\n throw new Error(\n `Invalid ${name}: cannot contain whitespace. Provide a valid identifier without spaces.`,\n );\n }\n return trimmed;\n}\n\n/**\n * Validates search parameters.\n * @throws Error if threshold or topK are invalid\n */\nfunction validateSearchParams(threshold?: number, topK?: number): void {\n if (threshold !== undefined) {\n if (typeof threshold !== \"number\" || isNaN(threshold)) {\n throw new Error(\"threshold must be a valid number\");\n }\n if (threshold < 0 || threshold > 1) {\n throw new Error(\n `Invalid threshold: ${threshold}. Must be between 0 and 1 (inclusive).`,\n );\n }\n }\n if (topK !== undefined) {\n if (typeof topK !== \"number\" || isNaN(topK) || !Number.isInteger(topK)) {\n throw new Error(\"topK must be a valid integer\");\n }\n if (topK < 0) {\n throw new Error(`Invalid topK: ${topK}. Must be a non-negative integer.`);\n }\n }\n}\n\nexport class Memory {\n private config: MemoryConfig;\n private customInstructions: string | undefined;\n private embedder: Embedder;\n private vectorStore!: VectorStore;\n private llm: LLM;\n private db: HistoryManager;\n private collectionName: string | undefined;\n private apiVersion: string;\n telemetryId: string;\n private _initPromise: Promise<void>;\n private _initError?: Error;\n private _entityStore?: VectorStore;\n\n constructor(config: Partial<MemoryConfig> = {}) {\n // Merge and validate config\n this.config = ConfigManager.mergeConfig(config);\n\n this.customInstructions = this.config.customInstructions;\n this.embedder = EmbedderFactory.create(\n this.config.embedder.provider,\n this.config.embedder.config,\n );\n // Vector store creation is deferred to _autoInitialize() so that\n // the embedding dimension can be auto-detected first when not\n // explicitly configured.\n this.llm = LLMFactory.create(\n this.config.llm.provider,\n this.config.llm.config,\n );\n if (this.config.disableHistory) {\n this.db = new DummyHistoryManager();\n } else {\n this.db = HistoryManagerFactory.create(\n this.config.historyStore!.provider,\n this.config.historyStore!,\n );\n }\n\n this.collectionName = this.config.vectorStore.config.collectionName;\n this.apiVersion = this.config.version || \"v1.0\";\n this.telemetryId = \"anonymous\";\n\n // Auto-detect embedding dimension (if needed), create vector store,\n // and initialize it. All public methods await this before proceeding.\n this._initPromise = this._autoInitialize().catch((error) => {\n this._initError =\n error instanceof Error ? error : new Error(String(error));\n console.error(this._initError);\n });\n }\n\n /**\n * If no explicit dimension was provided, runs a probe embedding to\n * detect it. Then creates and initializes the vector store.\n */\n private async _autoInitialize(): Promise<void> {\n if (!this.config.vectorStore.config.dimension) {\n try {\n const probe = await this.embedder.embed(\"dimension probe\");\n this.config.vectorStore.config.dimension = probe.length;\n } catch (error: any) {\n throw new Error(\n `Failed to auto-detect embedding dimension from provider '${this.config.embedder.provider}': ${error.message}. ` +\n `Please set 'dimension' in vectorStore.config or 'embeddingDims' in embedder.config explicitly.`,\n );\n }\n }\n\n this.vectorStore = VectorStoreFactory.create(\n this.config.vectorStore.provider,\n this.config.vectorStore.config,\n );\n\n // The vector store constructor may fire initialize() asynchronously\n // (e.g. Qdrant). Explicitly await it here to guarantee the backing\n // store (collections, tables, etc.) is ready before any public method\n // attempts to read or write.\n await this.vectorStore.initialize();\n\n await this._initializeTelemetry();\n }\n\n /**\n * Ensures that auto-initialization (dimension detection + vector store\n * creation) has completed before any public method proceeds.\n * If a previous init attempt failed, retries automatically.\n */\n private async _ensureInitialized(): Promise<void> {\n await this._initPromise;\n if (this._initError) {\n // Clear failed state and retry — the embedder or vector store\n // may have been transiently unavailable at startup.\n this._initError = undefined;\n this._initPromise = this._autoInitialize().catch((error) => {\n this._initError =\n error instanceof Error ? error : new Error(String(error));\n console.error(this._initError);\n });\n await this._initPromise;\n if (this._initError) {\n throw this._initError;\n }\n }\n }\n\n private async getEntityStore(): Promise<VectorStore> {\n if (!this._entityStore) {\n const entityCollectionName = `${this.collectionName}_entities`;\n const entityConfig = {\n ...this.config.vectorStore.config,\n collectionName: entityCollectionName,\n };\n // For file-based stores (memory/SQLite), always use a separate DB for entities\n if (this.config.vectorStore.provider === \"memory\") {\n const basePath = entityConfig.dbPath || getDefaultVectorStoreDbPath();\n entityConfig.dbPath = basePath.replace(/\\.db$/, \"_entities.db\");\n }\n this._entityStore = VectorStoreFactory.create(\n this.config.vectorStore.provider,\n entityConfig,\n );\n await this._entityStore.initialize();\n }\n return this._entityStore;\n }\n\n /**\n * Normalize a filters object for entity-store scoping: keeps only\n * user_id/agent_id/run_id keys whose values are defined.\n */\n private _sessionFiltersFromPayload(\n payload: Record<string, any>,\n ): Record<string, any> {\n const filters: Record<string, any> = {};\n if (payload.user_id) filters.user_id = payload.user_id;\n if (payload.agent_id) filters.agent_id = payload.agent_id;\n if (payload.run_id) filters.run_id = payload.run_id;\n return filters;\n }\n\n /**\n * Remove `memoryId` from every entity record scoped to `filters`.\n * If an entity's `linkedMemoryIds` becomes empty after removal, the\n * entity record itself is deleted. Errors on individual entities are\n * swallowed so one bad record does not break the whole operation.\n *\n * No-op if the entity store has not been initialized yet.\n */\n private async _removeMemoryFromEntityStore(\n memoryId: string,\n filters: Record<string, any>,\n ): Promise<void> {\n let entityStore: VectorStore;\n try {\n entityStore = await this.getEntityStore();\n } catch (e) {\n console.debug(`Entity store unavailable during cleanup: ${e}`);\n return;\n }\n\n let rows: Array<{ id: string; payload: Record<string, any> }> = [];\n try {\n const listed = await entityStore.list(filters, 10000);\n rows = (\n Array.isArray(listed) && Array.isArray(listed[0])\n ? listed[0]\n : (listed as any)\n ) as Array<{ id: string; payload: Record<string, any> }>;\n } catch (e) {\n console.debug(`Entity store list failed during cleanup: ${e}`);\n return;\n }\n\n for (const row of rows) {\n try {\n const payload = row.payload || {};\n const linked: string[] = Array.isArray(payload.linkedMemoryIds)\n ? payload.linkedMemoryIds\n : [];\n if (!linked.includes(memoryId)) continue;\n\n const remaining = linked.filter((id) => id !== memoryId);\n if (remaining.length === 0) {\n try {\n await entityStore.delete(row.id);\n } catch (e) {\n console.debug(`Entity delete failed for id=${row.id}: ${e}`);\n }\n } else {\n const newPayload = { ...payload, linkedMemoryIds: remaining };\n // entityStore.update requires a vector — re-embed entity text.\n const entityText =\n typeof payload.data === \"string\" ? payload.data : \"\";\n if (!entityText) {\n // Can't re-embed without text; skip gracefully.\n console.debug(\n `Entity id=${row.id} missing 'data'; skipping update during cleanup`,\n );\n continue;\n }\n let vec: number[];\n try {\n vec = await this.embedder.embed(entityText);\n } catch (e) {\n console.debug(`Entity re-embed failed for '${entityText}': ${e}`);\n continue;\n }\n try {\n await entityStore.update(row.id, vec, newPayload);\n } catch (e) {\n console.debug(`Entity update failed for id=${row.id}: ${e}`);\n }\n }\n } catch (e) {\n console.debug(`Entity cleanup error for id=${row?.id}: ${e}`);\n }\n }\n }\n\n /**\n * Extract entities from `text` and link them to `memoryId` in the\n * entity store, scoped to `filters` (user_id / agent_id / run_id).\n *\n * Simpler single-memory variant of Phase 7 in add(): no cross-memory\n * dedup, but still does per-entity \"search for existing, update if\n * match >= 0.95 else insert new\". Non-fatal errors are swallowed.\n */\n private async _linkEntitiesForMemory(\n memoryId: string,\n text: string,\n filters: Record<string, any>,\n ): Promise<void> {\n try {\n const entities = extractEntities(text);\n if (entities.length === 0) return;\n\n const entityStore = await this.getEntityStore();\n\n for (const entity of entities) {\n try {\n let entityVec: number[];\n try {\n entityVec = await this.embedder.embed(entity.text);\n } catch (e) {\n console.debug(`Entity embed failed for '${entity.text}': ${e}`);\n continue;\n }\n\n let matches: Array<{\n id: string;\n score?: number;\n payload: Record<string, any>;\n }> = [];\n try {\n matches = await entityStore.search(entityVec, 1, filters);\n } catch {}\n\n if (matches.length > 0 && (matches[0].score ?? 0) >= 0.95) {\n const match = matches[0];\n const payload = match.payload || {};\n const linked = new Set<string>(\n Array.isArray(payload.linkedMemoryIds)\n ? payload.linkedMemoryIds\n : [],\n );\n linked.add(memoryId);\n payload.linkedMemoryIds = Array.from(linked).sort();\n try {\n await entityStore.update(match.id, entityVec, payload);\n } catch (e) {\n console.debug(`Entity update failed for '${entity.text}': ${e}`);\n }\n } else {\n const entityPayload: Record<string, any> = {\n data: entity.text,\n entityType: entity.type,\n linkedMemoryIds: [memoryId],\n };\n if (filters.user_id) entityPayload.user_id = filters.user_id;\n if (filters.agent_id) entityPayload.agent_id = filters.agent_id;\n if (filters.run_id) entityPayload.run_id = filters.run_id;\n\n try {\n await entityStore.insert(\n [entityVec],\n [uuidv4()],\n [entityPayload],\n );\n } catch (e) {\n console.debug(`Entity insert failed for '${entity.text}': ${e}`);\n }\n }\n } catch (e) {\n console.debug(`Entity link error for '${entity.text}': ${e}`);\n }\n }\n } catch (e) {\n console.warn(`Entity linking failed during update: ${e}`);\n }\n }\n\n private buildSessionScope(filters: SearchFilters): string {\n const parts: string[] = [];\n for (const key of [\"agent_id\", \"run_id\", \"user_id\"].sort()) {\n const val = (filters as any)[key];\n if (val) parts.push(`${key}=${val}`);\n }\n return parts.join(\"&\");\n }\n\n private async _initializeTelemetry() {\n try {\n await this._getTelemetryId();\n\n // Capture initialization event\n await captureClientEvent(\"init\", this, {\n api_version: this.apiVersion,\n client_type: \"Memory\",\n collection_name: this.collectionName,\n });\n } catch (error) {}\n }\n\n private async _getTelemetryId() {\n try {\n if (\n !this.telemetryId ||\n this.telemetryId === \"anonymous\" ||\n this.telemetryId === \"anonymous-supabase\"\n ) {\n this.telemetryId =\n (await getOrCreateMem0UserId()) ||\n (await this.vectorStore.getUserId());\n try {\n await this.vectorStore.setUserId(this.telemetryId);\n } catch {}\n }\n return this.telemetryId;\n } catch (error) {\n this.telemetryId = \"anonymous\";\n return this.telemetryId;\n }\n }\n\n private async _captureEvent(methodName: string, additionalData = {}) {\n try {\n await this._getTelemetryId();\n await captureClientEvent(methodName, this, {\n ...additionalData,\n api_version: this.apiVersion,\n collection_name: this.collectionName,\n });\n } catch (error) {\n console.error(`Failed to capture ${methodName} event:`, error);\n }\n }\n\n static fromConfig(configDict: Record<string, any>): Memory {\n try {\n const config = MemoryConfigSchema.parse(configDict);\n return new Memory(config);\n } catch (e) {\n console.error(\"Configuration validation error:\", e);\n throw e;\n }\n }\n\n async add(\n messages: string | Message[],\n config: AddMemoryOptions,\n ): Promise<SearchResult> {\n // Validate messages input\n if (messages === undefined || messages === null) {\n throw new Error(\n \"messages is required and cannot be undefined or null. Provide a string or array of messages.\",\n );\n }\n\n await this._ensureInitialized();\n await this._captureEvent(\"add\", {\n message_count: Array.isArray(messages) ? messages.length : 1,\n has_metadata: !!config.metadata,\n has_filters: !!config.filters,\n infer: config.infer,\n });\n const { metadata = {}, filters = {}, infer = true } = config;\n\n // Validate and trim entity IDs\n const userId = validateAndTrimEntityId(config.userId, \"userId\");\n const agentId = validateAndTrimEntityId(config.agentId, \"agentId\");\n const runId = validateAndTrimEntityId(config.runId, \"runId\");\n\n // Convert camelCase entity params to snake_case for storage (matches API and search/getAll filters)\n if (userId) filters.user_id = metadata.user_id = userId;\n if (agentId) filters.agent_id = metadata.agent_id = agentId;\n if (runId) filters.run_id = metadata.run_id = runId;\n\n if (!filters.user_id && !filters.agent_id && !filters.run_id) {\n throw new Error(\n \"One of the filters: userId, agentId or runId is required!\",\n );\n }\n\n const parsedMessages = Array.isArray(messages)\n ? (messages as Message[])\n : [{ role: \"user\", content: messages }];\n\n const final_parsedMessages = await parse_vision_messages(parsedMessages);\n\n // Add to vector store\n const vectorStoreResult = await this.addToVectorStore(\n final_parsedMessages,\n metadata,\n filters,\n infer,\n );\n\n return {\n results: vectorStoreResult,\n };\n }\n\n private async addToVectorStore(\n messages: Message[],\n metadata: Record<string, any>,\n filters: SearchFilters,\n infer: boolean,\n ): Promise<MemoryItem[]> {\n if (!infer) {\n const returnedMemories: MemoryItem[] = [];\n for (const message of messages) {\n if (message.content === \"system\") {\n continue;\n }\n const memoryId = await this.createMemory(\n message.content as string,\n {},\n metadata,\n );\n returnedMemories.push({\n id: memoryId,\n memory: message.content as string,\n metadata: { event: \"ADD\" },\n });\n }\n return returnedMemories;\n }\n\n // === V3 PHASED BATCH PIPELINE ===\n\n // Phase 0: Context gathering\n const sessionScope = this.buildSessionScope(filters);\n let lastMessages: Array<{\n role: string;\n content: string;\n name?: string;\n }> = [];\n if (typeof this.db.getLastMessages === \"function\") {\n try {\n lastMessages = await this.db.getLastMessages(sessionScope, 10);\n } catch {\n // getLastMessages not supported — proceed without context\n }\n }\n const parsedMessages = messages.map((m) => m.content).join(\"\\n\");\n\n // Phase 1: Existing memory retrieval\n const queryEmbedding = await this.embedder.embed(parsedMessages);\n const existingResults = await this.vectorStore.search(\n queryEmbedding,\n 10,\n filters,\n );\n\n // Map UUIDs to integers (anti-hallucination)\n const existingMemories: Array<{ id: string; text: string }> = [];\n const uuidMapping: Record<string, string> = {};\n for (let idx = 0; idx < existingResults.length; idx++) {\n const mem = existingResults[idx];\n uuidMapping[String(idx)] = mem.id;\n existingMemories.push({\n id: String(idx),\n text: mem.payload?.data ?? \"\",\n });\n }\n\n // Phase 2: LLM extraction (single call)\n const isAgentScoped = !!filters.agent_id && !filters.user_id;\n let systemPrompt = ADDITIVE_EXTRACTION_PROMPT;\n if (isAgentScoped) {\n systemPrompt += AGENT_CONTEXT_SUFFIX;\n }\n\n const userPrompt = generateAdditiveExtractionPrompt({\n existingMemories,\n newMessages: parsedMessages,\n lastKMessages: lastMessages,\n customInstructions: this.customInstructions,\n });\n\n let response: string;\n try {\n response = (await this.llm.generateResponse(\n [\n { role: \"system\", content: systemPrompt },\n { role: \"user\", content: userPrompt },\n ],\n { type: \"json_object\" },\n )) as string;\n } catch (e) {\n console.error(\"LLM extraction failed:\", e);\n return [];\n }\n\n // Parse response\n let extractedMemories: Array<{\n id?: string;\n text?: string;\n attributed_to?: string;\n linked_memory_ids?: string[];\n }> = [];\n try {\n const cleanResponse = extractJson(response);\n if (cleanResponse && cleanResponse.trim()) {\n try {\n const parsed = AdditiveExtractionSchema.parse(\n JSON.parse(cleanResponse),\n );\n extractedMemories = parsed.memory;\n } catch {\n const fallbackJson = extractJson(cleanResponse);\n extractedMemories = JSON.parse(fallbackJson)?.memory ?? [];\n }\n }\n } catch (e) {\n console.error(\"Error parsing extraction response:\", e);\n extractedMemories = [];\n }\n\n if (extractedMemories.length === 0) {\n // Save messages even if nothing extracted\n if (typeof this.db.saveMessages === \"function\") {\n try {\n await this.db.saveMessages(\n messages.map((m) => ({\n role: m.role,\n content: m.content as string,\n })),\n sessionScope,\n );\n } catch {}\n }\n return [];\n }\n\n // Phase 3: Batch embed all extracted memory texts\n const memTexts = extractedMemories\n .map((m) => m.text ?? \"\")\n .filter((t) => t.length > 0);\n let embedMap: Record<string, number[]> = {};\n try {\n const memEmbeddingsList = await this.embedder.embedBatch(memTexts);\n for (let i = 0; i < memTexts.length; i++) {\n embedMap[memTexts[i]] = memEmbeddingsList[i];\n }\n } catch {\n // Fallback: embed individually\n for (const text of memTexts) {\n try {\n embedMap[text] = await this.embedder.embed(text);\n } catch (e) {\n console.warn(`Failed to embed memory text: ${e}`);\n }\n }\n }\n\n // Phase 4-5: CPU processing + hash dedup\n const existingHashes = new Set<string>();\n for (const mem of existingResults) {\n const h = mem.payload?.hash;\n if (h) existingHashes.add(h);\n }\n\n const records: Array<{\n memoryId: string;\n text: string;\n embedding: number[];\n payload: Record<string, any>;\n }> = [];\n const seenHashes = new Set<string>();\n\n for (const mem of extractedMemories) {\n const text = mem.text;\n if (!text || !(text in embedMap)) continue;\n\n const memHash = createHash(\"md5\").update(text).digest(\"hex\");\n if (existingHashes.has(memHash) || seenHashes.has(memHash)) {\n continue;\n }\n seenHashes.add(memHash);\n\n const textLemmatized = lemmatizeForBm25(text);\n const memoryId = uuidv4();\n const now = new Date().toISOString();\n\n const memPayload: Record<string, any> = {\n ...metadata,\n data: text,\n textLemmatized,\n hash: memHash,\n createdAt: now,\n updatedAt: now,\n };\n if (mem.attributed_to) {\n memPayload.attributedTo = mem.attributed_to;\n }\n if (filters.user_id) memPayload.user_id = filters.user_id;\n if (filters.agent_id) memPayload.agent_id = filters.agent_id;\n if (filters.run_id) memPayload.run_id = filters.run_id;\n\n records.push({\n memoryId,\n text,\n embedding: embedMap[text],\n payload: memPayload,\n });\n }\n\n if (records.length === 0) {\n if (typeof this.db.saveMessages === \"function\") {\n try {\n await this.db.saveMessages(\n messages.map((m) => ({\n role: m.role,\n content: m.content as string,\n })),\n sessionScope,\n );\n } catch {}\n }\n return [];\n }\n\n // Phase 6: Batch persist\n const allVectors = records.map((r) => r.embedding);\n const allIds = records.map((r) => r.memoryId);\n const allPayloads = records.map((r) => r.payload);\n\n try {\n await this.vectorStore.insert(allVectors, allIds, allPayloads);\n } catch {\n // Fallback: insert one by one\n for (let i = 0; i < allIds.length; i++) {\n try {\n await this.vectorStore.insert(\n [allVectors[i]],\n [allIds[i]],\n [allPayloads[i]],\n );\n } catch (e) {\n console.error(`Failed to insert memory ${allIds[i]}: ${e}`);\n }\n }\n }\n\n // Batch history\n const historyRecords = records.map((r) => ({\n memoryId: r.memoryId,\n previousValue: null as string | null,\n newValue: r.text as string | null,\n action: \"ADD\",\n createdAt: r.payload.createdAt as string | undefined,\n updatedAt: undefined as string | undefined,\n isDeleted: 0,\n }));\n\n if (typeof this.db.batchAddHistory === \"function\") {\n try {\n await this.db.batchAddHistory(historyRecords);\n } catch {\n // Fallback: add one by one\n for (const hr of historyRecords) {\n try {\n await this.db.addHistory(\n hr.memoryId,\n null,\n hr.newValue,\n \"ADD\",\n hr.createdAt,\n );\n } catch (e) {\n console.error(`Failed to add history for ${hr.memoryId}: ${e}`);\n }\n }\n }\n } else {\n for (const hr of historyRecords) {\n try {\n await this.db.addHistory(\n hr.memoryId,\n null,\n hr.newValue,\n \"ADD\",\n hr.createdAt,\n );\n } catch (e) {\n console.error(`Failed to add history for ${hr.memoryId}: ${e}`);\n }\n }\n }\n\n // Phase 7: Batch entity linking\n try {\n const allTexts = records.map((r) => r.text);\n const allEntities = extractEntitiesBatch(allTexts);\n\n // 7a: Global dedup — collect unique entities across all memories\n const globalEntities: Record<\n string,\n { entityType: string; entityText: string; memoryIds: Set<string> }\n > = {};\n for (let idx = 0; idx < records.length; idx++) {\n const memoryId = records[idx].memoryId;\n const entities = idx < allEntities.length ? allEntities[idx] : [];\n for (const entity of entities) {\n const key = entity.text.trim().toLowerCase();\n if (key in globalEntities) {\n globalEntities[key].memoryIds.add(memoryId);\n } else {\n globalEntities[key] = {\n entityType: entity.type,\n entityText: entity.text,\n memoryIds: new Set([memoryId]),\n };\n }\n }\n }\n\n const orderedKeys = Object.keys(globalEntities);\n if (orderedKeys.length > 0) {\n const entityTexts = orderedKeys.map(\n (k) => globalEntities[k].entityText,\n );\n\n // 7b: Single batch embed for all unique entities\n let entityEmbeddings: (number[] | null)[];\n try {\n entityEmbeddings = await this.embedder.embedBatch(entityTexts);\n } catch {\n // Fallback: embed individually\n entityEmbeddings = [];\n for (const t of entityTexts) {\n try {\n entityEmbeddings.push(await this.embedder.embed(t));\n } catch {\n entityEmbeddings.push(null);\n }\n }\n }\n\n // Filter out entities with failed embeddings\n const valid: Array<{ index: number; key: string }> = [];\n for (let i = 0; i < orderedKeys.length; i++) {\n if (entityEmbeddings[i] !== null) {\n valid.push({ index: i, key: orderedKeys[i] });\n }\n }\n\n if (valid.length > 0) {\n const entityStore = await this.getEntityStore();\n\n // 7c: Search for existing entities one by one (no batch search)\n const toInsertVectors: number[][] = [];\n const toInsertIds: string[] = [];\n const toInsertPayloads: Record<string, any>[] = [];\n\n for (const { index: j, key } of valid) {\n const { entityType, entityText, memoryIds } = globalEntities[key];\n const entityVec = entityEmbeddings[j]!;\n\n let matches: Array<{\n id: string;\n score?: number;\n payload: Record<string, any>;\n }> = [];\n try {\n matches = await entityStore.search(entityVec, 1, filters);\n } catch {}\n\n if (matches.length > 0 && (matches[0].score ?? 0) >= 0.95) {\n // Update existing entity\n const match = matches[0];\n const payload = match.payload || {};\n const linked = new Set<string>(payload.linkedMemoryIds ?? []);\n for (const mid of memoryIds) linked.add(mid);\n payload.linkedMemoryIds = Array.from(linked).sort();\n try {\n await entityStore.update(match.id, entityVec, payload);\n } catch (e) {\n console.debug(`Entity update failed for '${entityText}': ${e}`);\n }\n } else {\n // New entity — collect for batch insert\n const entityPayload: Record<string, any> = {\n data: entityText,\n entityType,\n linkedMemoryIds: Array.from(memoryIds).sort(),\n };\n if (filters.user_id) entityPayload.user_id = filters.user_id;\n if (filters.agent_id) entityPayload.agent_id = filters.agent_id;\n if (filters.run_id) entityPayload.run_id = filters.run_id;\n\n toInsertVectors.push(entityVec);\n toInsertIds.push(uuidv4());\n toInsertPayloads.push(entityPayload);\n }\n }\n\n // 7e: Single batch insert for all new entities\n if (toInsertVectors.length > 0) {\n try {\n await entityStore.insert(\n toInsertVectors,\n toInsertIds,\n toInsertPayloads,\n );\n } catch (e) {\n console.warn(`Batch entity insert failed: ${e}`);\n }\n }\n }\n }\n } catch (e) {\n console.warn(`Batch entity linking failed: ${e}`);\n }\n\n // Phase 8: Save messages + return\n if (typeof this.db.saveMessages === \"function\") {\n try {\n await this.db.saveMessages(\n messages.map((m) => ({\n role: m.role,\n content: m.content as string,\n })),\n sessionScope,\n );\n } catch {}\n }\n\n return records.map((r) => ({\n id: r.memoryId,\n memory: r.text,\n metadata: { event: \"ADD\" },\n }));\n }\n\n async get(memoryId: string): Promise<MemoryItem | null> {\n await this._ensureInitialized();\n const memory = await this.vectorStore.get(memoryId);\n if (!memory) return null;\n\n const filters = {\n ...(memory.payload.user_id && { user_id: memory.payload.user_id }),\n ...(memory.payload.agent_id && { agent_id: memory.payload.agent_id }),\n ...(memory.payload.run_id && { run_id: memory.payload.run_id }),\n };\n\n const memoryItem: MemoryItem = {\n id: memory.id,\n memory: memory.payload.data,\n hash: memory.payload.hash,\n createdAt: memory.payload.createdAt,\n updatedAt: memory.payload.updatedAt,\n metadata: {},\n };\n\n // Add additional metadata\n const excludedKeys = new Set([\n \"userId\",\n \"agentId\",\n \"runId\",\n \"hash\",\n \"data\",\n \"createdAt\",\n \"updatedAt\",\n \"textLemmatized\",\n \"attributedTo\",\n ]);\n for (const [key, value] of Object.entries(memory.payload)) {\n if (!excludedKeys.has(key)) {\n memoryItem.metadata![key] = value;\n }\n }\n\n return { ...memoryItem, ...filters };\n }\n\n async search(\n query: string,\n config: SearchMemoryOptions,\n ): Promise<SearchResult> {\n // Reject top-level entity params - must use filters instead\n rejectTopLevelEntityParams(config as Record<string, any>, \"search\");\n\n // Validate search parameters (before applying defaults)\n validateSearchParams(config.threshold, config.topK);\n\n // Validate and trim entity IDs in filters. Only include keys whose\n // validated value is defined — otherwise downstream vector stores\n // receive `agent_id: undefined` / `run_id: undefined` and fail\n // (Qdrant rejects the malformed match, pgvector binds NULL, Redis\n // emits a literal \"undefined\" string in TAG filters).\n const normalizedFilters: Record<string, any> = config.filters\n ? Object.fromEntries(\n Object.entries({\n ...config.filters,\n user_id: validateAndTrimEntityId(config.filters.user_id, \"user_id\"),\n agent_id: validateAndTrimEntityId(\n config.filters.agent_id,\n \"agent_id\",\n ),\n run_id: validateAndTrimEntityId(config.filters.run_id, \"run_id\"),\n }).filter(([, v]) => v !== undefined),\n )\n : {};\n\n await this._ensureInitialized();\n const { topK = 20, threshold = 0.1 } = config;\n\n await this._captureEvent(\"search\", {\n query_length: query.length,\n topK,\n has_filters: !!config.filters,\n });\n\n let effectiveFilters: Record<string, any> = { ...normalizedFilters };\n\n // Apply enhanced metadata filtering if advanced operators are detected\n if (this._hasAdvancedOperators(effectiveFilters)) {\n const processedFilters = this._processMetadataFilters(effectiveFilters);\n // Remove logical/operator keys that have been reprocessed\n for (const logicalKey of [\"AND\", \"OR\", \"NOT\"]) {\n delete effectiveFilters[logicalKey];\n }\n for (const fk of Object.keys(effectiveFilters)) {\n if (\n ![\"AND\", \"OR\", \"NOT\", \"user_id\", \"agent_id\", \"run_id\"].includes(fk) &&\n typeof effectiveFilters[fk] === \"object\" &&\n effectiveFilters[fk] !== null\n ) {\n delete effectiveFilters[fk];\n }\n }\n effectiveFilters = { ...effectiveFilters, ...processedFilters };\n }\n\n // Validate filters contains at least one entity ID (snake_case)\n if (\n !effectiveFilters.user_id &&\n !effectiveFilters.agent_id &&\n !effectiveFilters.run_id\n ) {\n throw new Error(\n \"filters must contain at least one of: user_id, agent_id, run_id. \" +\n \"Example: filters: { user_id: 'u1' }\",\n );\n }\n\n // Step 1: Preprocess query\n const queryLemmatized = lemmatizeForBm25(query);\n const queryEntities = extractEntities(query);\n\n // Step 2: Embed query\n const queryEmbedding = await this.embedder.embed(query);\n\n // Step 3: Semantic search (over-fetch for scoring pool)\n const internalLimit = Math.max(topK * 4, 60);\n const semanticResults = await this.vectorStore.search(\n queryEmbedding,\n internalLimit,\n effectiveFilters,\n );\n\n // Step 4: Keyword search (if store supports it)\n let keywordResults: Array<{\n id: string;\n score?: number;\n payload: Record<string, any>;\n }> | null = null;\n if (typeof this.vectorStore.keywordSearch === \"function\") {\n try {\n keywordResults =\n (await this.vectorStore.keywordSearch(\n queryLemmatized,\n internalLimit,\n effectiveFilters,\n )) ?? null;\n } catch {\n keywordResults = null;\n }\n }\n\n // Step 5: Compute BM25 scores from keyword results\n const bm25Scores: Record<string, number> = {};\n if (keywordResults) {\n const [midpoint, steepness] = getBm25Params(query, queryLemmatized);\n for (const mem of keywordResults) {\n const memId = String(mem.id);\n const rawScore = mem.score ?? 0;\n if (rawScore > 0) {\n bm25Scores[memId] = normalizeBm25(rawScore, midpoint, steepness);\n }\n }\n }\n\n // Step 6: Compute entity boosts\n const entityBoosts: Record<string, number> = {};\n if (queryEntities.length > 0) {\n try {\n // Deduplicate entities (max 8)\n const seen = new Set<string>();\n const deduped: Array<{ type: string; text: string }> = [];\n for (const entity of queryEntities.slice(0, 8)) {\n const key = entity.text.trim().toLowerCase();\n if (key && !seen.has(key)) {\n seen.add(key);\n deduped.push(entity);\n }\n }\n\n if (deduped.length > 0) {\n const entityStore = await this.getEntityStore();\n\n for (const entity of deduped) {\n try {\n const entityEmbedding = await this.embedder.embed(entity.text);\n const matches = await entityStore.search(\n entityEmbedding,\n 500,\n effectiveFilters,\n );\n\n for (const match of matches) {\n const similarity = match.score ?? 0;\n if (similarity < 0.5) continue;\n\n const payload = match.payload || {};\n const linkedMemoryIds = payload.linkedMemoryIds ?? [];\n if (!Array.isArray(linkedMemoryIds)) continue;\n\n // Spread-attenuated boost\n const numLinked = Math.max(linkedMemoryIds.length, 1);\n const memoryCountWeight =\n 1.0 / (1.0 + 0.001 * (numLinked - 1) ** 2);\n const boost =\n similarity * ENTITY_BOOST_WEIGHT * memoryCountWeight;\n\n for (const memoryId of linkedMemoryIds) {\n if (memoryId) {\n const memKey = String(memoryId);\n entityBoosts[memKey] = Math.max(\n entityBoosts[memKey] ?? 0,\n boost,\n );\n }\n }\n }\n } catch (e) {\n // Individual entity boost failed — continue\n }\n }\n }\n } catch (e) {\n console.warn(\"Entity boost computation failed:\", e);\n }\n }\n\n // Step 7: Build candidate set from semantic results\n const candidates = semanticResults.map((mem) => ({\n id: String(mem.id),\n score: mem.score ?? 0,\n payload: mem.payload || {},\n }));\n\n // Step 8: Score and rank\n const scoredResults = scoreAndRank(\n candidates,\n bm25Scores,\n entityBoosts,\n threshold ?? 0.1,\n topK,\n );\n\n // Step 9: Format results\n const excludedKeys = new Set([\n \"user_id\",\n \"agent_id\",\n \"run_id\",\n \"hash\",\n \"data\",\n \"createdAt\",\n \"updatedAt\",\n \"textLemmatized\",\n \"attributedTo\",\n ]);\n\n const results = scoredResults\n .filter((scored) => scored.payload?.data)\n .map((scored) => {\n const payload = scored.payload || {};\n return {\n id: scored.id,\n memory: payload.data,\n hash: payload.hash,\n createdAt: payload.createdAt,\n updatedAt: payload.updatedAt,\n score: scored.score,\n metadata: Object.entries(payload)\n .filter(([key]) => !excludedKeys.has(key))\n .reduce((acc, [key, value]) => ({ ...acc, [key]: value }), {}),\n ...(payload.user_id && { user_id: payload.user_id }),\n ...(payload.agent_id && { agent_id: payload.agent_id }),\n ...(payload.run_id && { run_id: payload.run_id }),\n };\n });\n\n return {\n results,\n };\n }\n\n async update(memoryId: string, data: string): Promise<{ message: string }> {\n await this._ensureInitialized();\n await this._captureEvent(\"update\", { memory_id: memoryId });\n const embedding = await this.embedder.embed(data);\n await this.updateMemory(memoryId, data, { [data]: embedding });\n return { message: \"Memory updated successfully!\" };\n }\n\n async delete(memoryId: string): Promise<{ message: string }> {\n await this._ensureInitialized();\n await this._captureEvent(\"delete\", { memory_id: memoryId });\n await this.deleteMemory(memoryId);\n return { message: \"Memory deleted successfully!\" };\n }\n\n async deleteAll(\n config: DeleteAllMemoryOptions,\n ): Promise<{ message: string }> {\n await this._ensureInitialized();\n await this._captureEvent(\"delete_all\", {\n has_user_id: !!config.userId,\n has_agent_id: !!config.agentId,\n has_run_id: !!config.runId,\n });\n const { userId, agentId, runId } = config;\n\n // Convert camelCase entity params to snake_case for filters (matches storage and search/getAll)\n const filters: SearchFilters = {};\n if (userId) filters.user_id = userId;\n if (agentId) filters.agent_id = agentId;\n if (runId) filters.run_id = runId;\n\n if (!Object.keys(filters).length) {\n throw new Error(\n \"At least one filter is required to delete all memories. If you want to delete all memories, use the `reset()` method.\",\n );\n }\n\n const [memories] = await this.vectorStore.list(filters);\n for (const memory of memories) {\n await this.deleteMemory(memory.id);\n }\n\n return { message: \"Memories deleted successfully!\" };\n }\n\n async history(memoryId: string): Promise<any[]> {\n await this._ensureInitialized();\n return this.db.getHistory(memoryId);\n }\n\n async reset(): Promise<void> {\n await this._ensureInitialized();\n await this._captureEvent(\"reset\");\n await this.db.reset();\n\n // Check provider before attempting deleteCol\n if (this.config.vectorStore.provider.toLowerCase() !== \"langchain\") {\n try {\n await this.vectorStore.deleteCol();\n } catch (e) {\n console.error(\n `Failed to delete collection for provider '${this.config.vectorStore.provider}':`,\n e,\n );\n // Decide if you want to re-throw or just log\n }\n } else {\n console.warn(\n \"Memory.reset(): Skipping vector store collection deletion as 'langchain' provider is used. Underlying Langchain vector store data is not cleared by this operation.\",\n );\n }\n\n if (this._entityStore) {\n try {\n await this._entityStore.deleteCol();\n } catch {}\n this._entityStore = undefined;\n }\n\n // Re-initialize factories/clients based on the original config.\n // Dimension is already set in this.config from the initial probe,\n // so _autoInitialize will skip the probe and just re-create the store.\n this.embedder = EmbedderFactory.create(\n this.config.embedder.provider,\n this.config.embedder.config,\n );\n this.llm = LLMFactory.create(\n this.config.llm.provider,\n this.config.llm.config,\n );\n\n // Re-create vector store via _autoInitialize (which handles dimension + creation)\n this._initError = undefined;\n this._initPromise = this._autoInitialize().catch((error) => {\n this._initError =\n error instanceof Error ? error : new Error(String(error));\n console.error(this._initError);\n });\n await this._initPromise;\n }\n\n async getAll(config: GetAllMemoryOptions): Promise<SearchResult> {\n // Reject top-level entity params - must use filters instead\n rejectTopLevelEntityParams(config as Record<string, any>, \"getAll\");\n\n // Validate topK if provided (before applying defaults)\n validateSearchParams(undefined, config.topK);\n\n await this._ensureInitialized();\n\n const { topK = 20 } = config;\n\n // Validate and trim entity IDs in filters. Drop keys that resolve to\n // undefined so downstream vector stores don't receive\n // `agent_id: undefined` / `run_id: undefined` and fail.\n const filters: Record<string, any> = Object.fromEntries(\n Object.entries({\n ...(config.filters || {}),\n user_id: validateAndTrimEntityId(config.filters?.user_id, \"user_id\"),\n agent_id: validateAndTrimEntityId(config.filters?.agent_id, \"agent_id\"),\n run_id: validateAndTrimEntityId(config.filters?.run_id, \"run_id\"),\n }).filter(([, v]) => v !== undefined),\n );\n\n await this._captureEvent(\"get_all\", {\n topK,\n has_user_id: !!filters.user_id,\n has_agent_id: !!filters.agent_id,\n has_run_id: !!filters.run_id,\n });\n\n // Validate filters contains at least one entity ID (snake_case)\n if (!filters.user_id && !filters.agent_id && !filters.run_id) {\n throw new Error(\n \"filters must contain at least one of: user_id, agent_id, run_id. \" +\n \"Example: filters: { user_id: 'u1' }\",\n );\n }\n\n const [memories] = await this.vectorStore.list(filters, topK);\n\n const excludedKeys = new Set([\n \"user_id\",\n \"agent_id\",\n \"run_id\",\n \"hash\",\n \"data\",\n \"createdAt\",\n \"updatedAt\",\n \"textLemmatized\",\n \"attributedTo\",\n ]);\n const results = memories.map((mem) => ({\n id: mem.id,\n memory: mem.payload.data,\n hash: mem.payload.hash,\n createdAt: mem.payload.createdAt,\n updatedAt: mem.payload.updatedAt,\n metadata: Object.entries(mem.payload)\n .filter(([key]) => !excludedKeys.has(key))\n .reduce((acc, [key, value]) => ({ ...acc, [key]: value }), {}),\n ...(mem.payload.user_id && { user_id: mem.payload.user_id }),\n ...(mem.payload.agent_id && { agent_id: mem.payload.agent_id }),\n ...(mem.payload.run_id && { run_id: mem.payload.run_id }),\n }));\n\n return { results };\n }\n\n private async createMemory(\n data: string,\n existingEmbeddings: Record<string, number[]>,\n metadata: Record<string, any>,\n ): Promise<string> {\n const memoryId = uuidv4();\n const embedding =\n existingEmbeddings[data] || (await this.embedder.embed(data));\n\n const memoryMetadata = {\n ...metadata,\n data,\n hash: createHash(\"md5\").update(data).digest(\"hex\"),\n textLemmatized: lemmatizeForBm25(data),\n createdAt: new Date().toISOString(),\n };\n\n await this.vectorStore.insert([embedding], [memoryId], [memoryMetadata]);\n await this.db.addHistory(\n memoryId,\n null,\n data,\n