UNPKG

@dooor-ai/toolkit

Version:

Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph

152 lines (146 loc) 5.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ContextualRecallEval = void 0; const base_1 = require("./base"); const cortexdb_client_1 = require("../observability/cortexdb-client"); /** * ContextualRecallEval - Measures if the retrieval system found all relevant information * * Evaluates whether the retrieved context contains all the information needed to * answer the question correctly. High recall means nothing important was missed. * * Example: * ```typescript * const eval = new ContextualRecallEval({ * threshold: 0.8, * expectedAnswer: "Paris is the capital and was founded in 3rd century BC.", * context: "Paris is the capital of France." * }); * const result = await eval.evaluate( * "What is the capital of France and when was it founded?", * "The capital is Paris." * ); * // result.score = 0.5 (missed founding date info), result.passed = false * ``` */ class ContextualRecallEval extends base_1.Eval { constructor(config = {}) { super(config); this.expectedAnswer = config.expectedAnswer; this.context = config.context; } get name() { return "ContextualRecallEval"; } /** * Set expected answer dynamically */ setExpectedAnswer(answer) { this.expectedAnswer = answer; } /** * Set context dynamically */ setContext(context) { this.context = context; } async evaluate(input, output, metadata) { const startTime = Date.now(); const context = this.context || metadata?.context || metadata?.retrievedDocs; const expected = this.expectedAnswer || metadata?.expectedAnswer || metadata?.groundTruth; if (!context || !expected) { return { name: this.name, score: 0.5, passed: false, details: "Missing required data for contextual recall. Provide 'context' and 'expectedAnswer' via config or metadata.", metadata: { latency: Date.now() - startTime, }, timestamp: new Date(), }; } try { const cortexClient = (0, cortexdb_client_1.getCortexDBClient)(); const providerName = (0, cortexdb_client_1.getGlobalProviderName)(); const prompt = this.buildPrompt(input, expected, context); const response = await cortexClient.invokeAI({ prompt, usage: "evaluation", providerName: providerName || undefined, temperature: 0.0, maxTokens: 300, }); const score = this.parseScore(response.text); const passed = score >= this.getThreshold(); return { name: this.name, score, passed, details: `Contextual recall score: ${score.toFixed(2)}. ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`, metadata: { latency: Date.now() - startTime, judgeResponse: response.text, contextLength: context.length, }, timestamp: new Date(), }; } catch (error) { console.error("ContextualRecallEval failed:", error); return { name: this.name, score: 0.5, passed: false, details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`, metadata: { error: String(error), latency: Date.now() - startTime, }, timestamp: new Date(), }; } } buildPrompt(question, expectedAnswer, context) { return `You are an expert evaluator. Your task is to measure RECALL: did the retrieved context contain all the information needed to produce the expected answer? Question: "${question}" Expected Answer (ground truth): """ ${expectedAnswer} """ Retrieved Context: """ ${context} """ Evaluate recall (how complete is the context): - 1.0 = Context contains ALL information from expected answer - 0.7-0.9 = Context contains most information, minor gaps - 0.4-0.6 = Context missing significant information - 0.0-0.3 = Context missing most/all key information Output ONLY a JSON object in this exact format: { "score": 0.85, "reasoning": "What information is present vs missing from context" }`; } parseScore(response) { try { const jsonMatch = response.match(/\{[\s\S]*\}/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]); if (typeof parsed.score === "number") { return Math.max(0, Math.min(1, parsed.score)); } } const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/); if (numberMatch) { return Math.max(0, Math.min(1, parseFloat(numberMatch[0]))); } console.warn("Could not parse score from response:", response); return 0.5; } catch (error) { console.error("Error parsing score:", error); return 0.5; } } } exports.ContextualRecallEval = ContextualRecallEval;