@dooor-ai/toolkit

import { Eval } from "./base"; import { EvalResult, EvalConfig } from "../core/types"; import { getCortexDBClient, getGlobalProviderName } from "../observability/cortexdb-client"; export interface HallucinationConfig extends EvalConfig { /** Context to check against (optional, otherwise checks for factual coherence) */ context?: string; } /** * HallucinationEval - Detects when LLM makes up facts or unsupported claims * * If context is provided, checks if answer contains claims not in context. * If no context, evaluates internal consistency and factual plausibility. * * Example: * ```typescript * const eval = new HallucinationEval({ * threshold: 0.8, * context: "The Eiffel Tower is 330 meters tall." * }); * const result = await eval.evaluate( * "How tall is the Eiffel Tower?", * "The Eiffel Tower is 450 meters tall and was built in 1950." * ); * // result.score = 0.2 (high hallucination), result.passed = false * ``` */ export class HallucinationEval extends Eval { private context?: string; constructor(config: HallucinationConfig = {}) { super(config); this.context = config.context; } get name(): string { return "HallucinationEval"; } /** * Set context dynamically */ setContext(context: string): void { this.context = context; } async evaluate( input: string, output: string, metadata?: Record<string, any> ): Promise<EvalResult> { const startTime = Date.now(); const context = this.context || metadata?.context || metadata?.retrievedDocs; try { const cortexClient = getCortexDBClient(); const providerName = getGlobalProviderName(); const prompt = context ? this.buildPromptWithContext(input, output, context) : this.buildPromptWithoutContext(input, output); const response = await cortexClient.invokeAI({ prompt, usage: "evaluation", providerName: providerName || undefined, temperature: 0.0, maxTokens: 300, }); const score = this.parseScore(response.text); const passed = score >= this.getThreshold(); return { name: this.name, score, passed, details: `Hallucination score: ${score.toFixed(2)} (higher is better). ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`, metadata: { latency: Date.now() - startTime, judgeResponse: response.text, hasContext: !!context, }, timestamp: new Date(), }; } catch (error) { console.error("HallucinationEval failed:", error); return { name: this.name, score: 0.5, passed: false, details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`, metadata: { error: String(error), latency: Date.now() - startTime, }, timestamp: new Date(), }; } } private buildPromptWithContext(question: string, answer: string, context: string): string { return `You are an expert evaluator. Your task is to detect HALLUCINATIONS: claims in the answer that are NOT supported by the context. Context: """ ${context} """ Question: "${question}" Answer: "${answer}" Evaluate hallucination (inverse scale): - 1.0 = Zero hallucinations, all claims are grounded - 0.7-0.9 = Minor unsupported details - 0.4-0.6 = Some significant hallucinations - 0.0-0.3 = Major hallucinations, answer is mostly fabricated Output ONLY a JSON object in this exact format: { "score": 0.9, "reasoning": "What claims are supported vs hallucinated" }`; } private buildPromptWithoutContext(question: string, answer: string): string { return `You are an expert evaluator. Your task is to detect HALLUCINATIONS: claims in the answer that are factually implausible or internally inconsistent. Question: "${question}" Answer: "${answer}" Evaluate factual plausibility: - 1.0 = All claims seem factually plausible and consistent - 0.7-0.9 = Mostly plausible, minor inconsistencies - 0.4-0.6 = Some implausible or contradictory claims - 0.0-0.3 = Major factual errors or contradictions Output ONLY a JSON object in this exact format: { "score": 0.85, "reasoning": "Assessment of factual plausibility" }`; } private parseScore(response: string): number { try { const jsonMatch = response.match(/\{[\s\S]*\}/); if (jsonMatch) { const parsed = JSON.parse(jsonMatch[0]); if (typeof parsed.score === "number") { return Math.max(0, Math.min(1, parsed.score)); } } const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/); if (numberMatch) { return Math.max(0, Math.min(1, parseFloat(numberMatch[0]))); } console.warn("Could not parse score from response:", response); return 0.5; } catch (error) { console.error("Error parsing score:", error); return 0.5; } } }