@dooor-ai/toolkit

import { Eval } from "./base"; import { EvalResult, EvalConfig } from "../core/types"; import { getCortexDBClient, getGlobalProviderName } from "../observability/cortexdb-client"; export interface KnowledgeRetentionConfig extends EvalConfig { /** Conversation history (previous messages) */ conversationHistory?: Array<{ role: string; content: string }>; } /** * KnowledgeRetentionEval - Measures if LLM remembers previous conversation context * * Evaluates whether the response demonstrates awareness of earlier messages, * important for multi-turn conversations and chatbots. * * Example: * ```typescript * const eval = new KnowledgeRetentionEval({ * threshold: 0.8, * conversationHistory: [ * { role: "user", content: "My name is Alice." }, * { role: "assistant", content: "Nice to meet you, Alice!" } * ] * }); * const result = await eval.evaluate( * "What's my name?", * "Your name is Alice." * ); * // result.score = 1.0, result.passed = true * ``` */ export class KnowledgeRetentionEval extends Eval { private conversationHistory?: Array<{ role: string; content: string }>; constructor(config: KnowledgeRetentionConfig = {}) { super(config); this.conversationHistory = config.conversationHistory; } get name(): string { return "KnowledgeRetentionEval"; } /** * Set conversation history dynamically */ setConversationHistory(history: Array<{ role: string; content: string }>): void { this.conversationHistory = history; } async evaluate( input: string, output: string, metadata?: Record<string, any> ): Promise<EvalResult> { const startTime = Date.now(); const history = this.conversationHistory || metadata?.conversationHistory || metadata?.history; if (!history || history.length === 0) { return { name: this.name, score: 0.5, passed: false, details: "No conversation history provided. Pass 'conversationHistory' via config or metadata.", metadata: { latency: Date.now() - startTime, }, timestamp: new Date(), }; } try { const cortexClient = getCortexDBClient(); const providerName = getGlobalProviderName(); const prompt = this.buildPrompt(history, input, output); const response = await cortexClient.invokeAI({ prompt, usage: "evaluation", providerName: providerName || undefined, temperature: 0.0, maxTokens: 300, }); const score = this.parseScore(response.text); const passed = score >= this.getThreshold(); return { name: this.name, score, passed, details: `Knowledge retention score: ${score.toFixed(2)}. ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`, metadata: { latency: Date.now() - startTime, judgeResponse: response.text, historyLength: history.length, }, timestamp: new Date(), }; } catch (error) { console.error("KnowledgeRetentionEval failed:", error); return { name: this.name, score: 0.5, passed: false, details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`, metadata: { error: String(error), latency: Date.now() - startTime, }, timestamp: new Date(), }; } } private buildPrompt( history: Array<{ role: string; content: string }>, currentInput: string, currentOutput: string ): string { const historyText = history .map((msg) => `${msg.role.toUpperCase()}: ${msg.content}`) .join("\n"); return `You are an expert evaluator. Your task is to assess if the response demonstrates KNOWLEDGE RETENTION from previous conversation. Conversation History: ${historyText} Current Turn: USER: ${currentInput} ASSISTANT: ${currentOutput} Evaluate knowledge retention: - 1.0 = Perfect retention, references earlier context appropriately - 0.7-0.9 = Good retention, mostly aware of history - 0.4-0.6 = Partial retention, misses some important context - 0.0-0.3 = Poor retention, ignores or contradicts earlier information Output ONLY a JSON object in this exact format: { "score": 0.9, "reasoning": "Assessment of how well the response uses conversation history" }`; } private parseScore(response: string): number { try { const jsonMatch = response.match(/\{[\s\S]*\}/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]); if (typeof parsed.score === "number") { return Math.max(0, Math.min(1, parsed.score)); } } const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/); if (numberMatch) { return Math.max(0, Math.min(1, parseFloat(numberMatch[0]))); } console.warn("Could not parse score from response:", response); return 0.5; } catch (error) { console.error("Error parsing score:", error); return 0.5; } } }