@dooor-ai/toolkit
Version:
Guards, Evals & Observability for AI applications - works seamlessly with LangChain/LangGraph
170 lines (144 loc) • 4.97 kB
text/typescript
import { Eval } from "./base";
import { EvalResult, EvalConfig } from "../core/types";
import { getCortexDBClient, getGlobalProviderName } from "../observability/cortexdb-client";
export interface HallucinationConfig extends EvalConfig {
/** Context to check against (optional, otherwise checks for factual coherence) */
context?: string;
}
/**
* HallucinationEval - Detects when LLM makes up facts or unsupported claims
*
* If context is provided, checks if answer contains claims not in context.
* If no context, evaluates internal consistency and factual plausibility.
*
* Example:
* ```typescript
* const eval = new HallucinationEval({
* threshold: 0.8,
* context: "The Eiffel Tower is 330 meters tall."
* });
* const result = await eval.evaluate(
* "How tall is the Eiffel Tower?",
* "The Eiffel Tower is 450 meters tall and was built in 1950."
* );
* // result.score = 0.2 (high hallucination), result.passed = false
* ```
*/
export class HallucinationEval extends Eval {
private context?: string;
constructor(config: HallucinationConfig = {}) {
super(config);
this.context = config.context;
}
get name(): string {
return "HallucinationEval";
}
/**
* Set context dynamically
*/
setContext(context: string): void {
this.context = context;
}
async evaluate(
input: string,
output: string,
metadata?: Record<string, any>
): Promise<EvalResult> {
const startTime = Date.now();
const context = this.context || metadata?.context || metadata?.retrievedDocs;
try {
const cortexClient = getCortexDBClient();
const providerName = getGlobalProviderName();
const prompt = context
? this.buildPromptWithContext(input, output, context)
: this.buildPromptWithoutContext(input, output);
const response = await cortexClient.invokeAI({
prompt,
usage: "evaluation",
providerName: providerName || undefined,
temperature: 0.0,
maxTokens: 300,
});
const score = this.parseScore(response.text);
const passed = score >= this.getThreshold();
return {
name: this.name,
score,
passed,
details: `Hallucination score: ${score.toFixed(2)} (higher is better). ${passed ? "PASSED" : "FAILED"} (threshold: ${this.getThreshold()})`,
metadata: {
latency: Date.now() - startTime,
judgeResponse: response.text,
hasContext: !!context,
},
timestamp: new Date(),
};
} catch (error) {
console.error("HallucinationEval failed:", error);
return {
name: this.name,
score: 0.5,
passed: false,
details: `Eval failed: ${error instanceof Error ? error.message : "Unknown error"}`,
metadata: {
error: String(error),
latency: Date.now() - startTime,
},
timestamp: new Date(),
};
}
}
private buildPromptWithContext(question: string, answer: string, context: string): string {
return `You are an expert evaluator. Your task is to detect HALLUCINATIONS: claims in the answer that are NOT supported by the context.
Context: """
${context}
"""
Question: "${question}"
Answer: "${answer}"
Evaluate hallucination (inverse scale):
- 1.0 = Zero hallucinations, all claims are grounded
- 0.7-0.9 = Minor unsupported details
- 0.4-0.6 = Some significant hallucinations
- 0.0-0.3 = Major hallucinations, answer is mostly fabricated
Output ONLY a JSON object in this exact format:
{
"score": 0.9,
"reasoning": "What claims are supported vs hallucinated"
}`;
}
private buildPromptWithoutContext(question: string, answer: string): string {
return `You are an expert evaluator. Your task is to detect HALLUCINATIONS: claims in the answer that are factually implausible or internally inconsistent.
Question: "${question}"
Answer: "${answer}"
Evaluate factual plausibility:
- 1.0 = All claims seem factually plausible and consistent
- 0.7-0.9 = Mostly plausible, minor inconsistencies
- 0.4-0.6 = Some implausible or contradictory claims
- 0.0-0.3 = Major factual errors or contradictions
Output ONLY a JSON object in this exact format:
{
"score": 0.85,
"reasoning": "Assessment of factual plausibility"
}`;
}
private parseScore(response: string): number {
try {
const jsonMatch = response.match(/\{[\s\S]*\}/);
if (jsonMatch) {
const parsed = JSON.parse(jsonMatch[0]);
if (typeof parsed.score === "number") {
return Math.max(0, Math.min(1, parsed.score));
}
}
const numberMatch = response.match(/\b0?\.\d+\b|\b1\.0\b|\b[01]\b/);
if (numberMatch) {
return Math.max(0, Math.min(1, parseFloat(numberMatch[0])));
}
console.warn("Could not parse score from response:", response);
return 0.5;
} catch (error) {
console.error("Error parsing score:", error);
return 0.5;
}
}
}