@dooor-ai/toolkit
Version:
Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph
152 lines (146 loc) • 5.4 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ContextualRecallEval = void 0;
const base_1 = require("./base");
const cortexdb_client_1 = require("../observability/cortexdb-client");
/**
* ContextualRecallEval - Measures if the retrieval system found all relevant information
*
* Evaluates whether the retrieved context contains all the information needed to
* answer the question correctly. High recall means nothing important was missed.
*
* Example:
* ```typescript
* const eval = new ContextualRecallEval({
* threshold: 0.8,
* expectedAnswer: "Paris is the capital and was founded in 3rd century BC.",
* context: "Paris is the capital of France."
* });
* const result = await eval.evaluate(
* "What is the capital of France and when was it founded?",
* "The capital is Paris."
* );
* // result.score = 0.5 (missed founding date info), result.passed = false
* ```
*/
class ContextualRecallEval extends base_1.Eval {
constructor(config = {}) {
super(config);
this.expectedAnswer = config.expectedAnswer;
this.context = config.context;
}
get name() {
return "ContextualRecallEval";
}
/**
* Set expected answer dynamically
*/
setExpectedAnswer(answer) {
this.expectedAnswer = answer;
}
/**
* Set context dynamically
*/
setContext(context) {
this.context = context;
}
async evaluate(input, output, metadata) {
const startTime = Date.now();
const context = this.context || metadata?.context || metadata?.retrievedDocs;
const expected = this.expectedAnswer || metadata?.expectedAnswer || metadata?.groundTruth;
if (!context || !expected) {
return {
name: this.name,
score: 0.5,
passed: false,
details: "Missing required data for contextual recall. Provide 'context' and 'expectedAnswer' via config or metadata.",
metadata: {
latency: Date.now() - startTime,
},
timestamp: new Date(),
};
}
try {
const cortexClient = (0, cortexdb_client_1.getCortexDBClient)();
const providerName = (0, cortexdb_client_1.getGlobalProviderName)();
const prompt = this.buildPrompt(input, expected, context);
const response = await cortexClient.invokeAI({
prompt,
usage: "evaluation",
providerName: providerName || undefined,
temperature: 0.0,
maxTokens: 300,
});
const score = this.parseScore(response.text);
const passed = score >= this.getThreshold();
return {
name: this.name,
score,
passed,
details: `Contextual recall score: ${score.toFixed(2)}. ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`,
metadata: {
latency: Date.now() - startTime,
judgeResponse: response.text,
contextLength: context.length,
},
timestamp: new Date(),
};
}
catch (error) {
console.error("ContextualRecallEval failed:", error);
return {
name: this.name,
score: 0.5,
passed: false,
details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`,
metadata: {
error: String(error),
latency: Date.now() - startTime,
},
timestamp: new Date(),
};
}
}
buildPrompt(question, expectedAnswer, context) {
return `You are an expert evaluator. Your task is to measure RECALL: did the retrieved context contain all the information needed to produce the expected answer?
Question: "${question}"
Expected Answer (ground truth): """
${expectedAnswer}
"""
Retrieved Context: """
${context}
"""
Evaluate recall (how complete is the context):
- 1.0 = Context contains ALL information from expected answer
- 0.7-0.9 = Context contains most information, minor gaps
- 0.4-0.6 = Context missing significant information
- 0.0-0.3 = Context missing most/all key information
Output ONLY a JSON object in this exact format:
{
"score": 0.85,
"reasoning": "What information is present vs missing from context"
}`;
}
parseScore(response) {
try {
const jsonMatch = response.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
if (typeof parsed.score === "number") {
return Math.max(0, Math.min(1, parsed.score));
}
}
const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/);
if (numberMatch) {
return Math.max(0, Math.min(1, parseFloat(numberMatch[0])));
}
console.warn("Could not parse score from response:", response);
return 0.5;
}
catch (error) {
console.error("Error parsing score:", error);
return 0.5;
}
}
}
exports.ContextualRecallEval = ContextualRecallEval;