@vezlo/ai-validator
Version:
AI Response Validator - Automated accuracy checking, hallucination prevention, and confidence scoring for AI responses
156 lines (144 loc) • 6.67 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.HallucinationDetector = void 0;
const openai_1 = __importDefault(require("openai"));
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
class HallucinationDetector {
constructor(openaiApiKey, claudeApiKey) {
if (openaiApiKey) {
this.openai = new openai_1.default({ apiKey: openaiApiKey });
}
if (claudeApiKey) {
this.claude = new sdk_1.default({ apiKey: claudeApiKey });
}
}
async detectHallucination(response, sources, llmProvider, model) {
// If no sources, high hallucination risk
if (sources.length === 0) {
return {
detected: true,
risk: 0.8,
hallucinated_parts: ['entire_response']
};
}
// Combine all source content
const sourceContent = sources.map(s => s.content).join('\n\n');
try {
if (llmProvider === 'openai' && this.openai) {
return await this.detectHallucinationWithOpenAI(response, sourceContent, model);
}
else if (llmProvider === 'claude' && this.claude) {
return await this.detectHallucinationWithClaude(response, sourceContent, model);
}
else {
throw new Error(`LLM provider ${llmProvider} not available`);
}
}
catch (error) {
console.error('Hallucination detection error:', error);
return {
detected: true,
risk: 0.9,
hallucinated_parts: ['hallucination_check_failed']
};
}
}
parseLLMResponse(content) {
try {
// Try to parse as-is first
return JSON.parse(content);
}
catch {
try {
// Remove markdown code blocks if present
const cleaned = content.replace(/```json\s*/g, '').replace(/```\s*/g, '').trim();
return JSON.parse(cleaned);
}
catch {
// If all else fails, return empty object
console.warn('Failed to parse LLM response as JSON:', content);
return {};
}
}
}
async detectHallucinationWithOpenAI(response, sourceContent, model) {
const prompt = `You are a strict hallucination detector. Compare the AI response against the sources.
**Sources:**
${sourceContent}
**AI Response:**
${response}
**Critical Rules:**
1. If the response discusses topics/concepts NOT in the sources → HALLUCINATED
2. If the response contains facts NOT mentioned in sources → HALLUCINATED
3. If response contradicts sources → HALLUCINATED
4. Only if response uses information FROM the sources (even partially) → NOT hallucinated
**Examples:**
✓ Source: "GPT is a language model", Response: "GPT is a language model" → NOT hallucinated
✓ Source: "GPT is a language model", Response: "GPT is a model" → NOT hallucinated (partial)
✗ Source: "GPT is a language model", Response: "Machine learning uses algorithms" → HALLUCINATED (different topic)
✗ Source: "GPT uses transformers", Response: "GPT uses neural networks" → HALLUCINATED (not in source)
You MUST respond with this exact JSON structure:
{"detected": true/false, "risk": 0.0-1.0, "hallucinated_parts": []}
Response:`;
console.log('→ Hallucination Detection (OpenAI -', (model || 'gpt-4o') + ')');
const completion = await this.openai.chat.completions.create({
model: model || 'gpt-4o',
messages: [
{ role: 'system', content: 'You are a hallucination detector. Always respond with valid JSON only.' },
{ role: 'user', content: prompt }
],
temperature: 0.0,
max_tokens: 500,
response_format: { type: 'json_object' }
});
const rawResponse = completion.choices[0].message.content || '{}';
const result = this.parseLLMResponse(rawResponse);
console.log(' ✓ Detected:', result.detected, '| Risk:', result.risk);
return {
detected: result.detected || false,
risk: result.risk || 0,
hallucinated_parts: result.hallucinated_parts || []
};
}
async detectHallucinationWithClaude(response, sourceContent, model) {
const prompt = `You are a strict hallucination detector. Compare the AI response against the sources.
**Sources:**
${sourceContent}
**AI Response:**
${response}
**Critical Rules:**
1. If the response discusses topics/concepts NOT in the sources → HALLUCINATED
2. If the response contains facts NOT mentioned in sources → HALLUCINATED
3. If response contradicts sources → HALLUCINATED
4. Only if response uses information FROM the sources (even partially) → NOT hallucinated
**Examples:**
✓ Source: "GPT is a language model", Response: "GPT is a language model" → NOT hallucinated
✓ Source: "GPT is a language model", Response: "GPT is a model" → NOT hallucinated (partial)
✗ Source: "GPT is a language model", Response: "Machine learning uses algorithms" → HALLUCINATED (different topic)
✗ Source: "GPT uses transformers", Response: "GPT uses neural networks" → HALLUCINATED (not in source)
You MUST respond with this exact JSON structure:
{"detected": true/false, "risk": 0.0-1.0, "hallucinated_parts": []}
Response:`;
console.log('→ Hallucination Detection (Claude -', (model || 'claude-sonnet-4-5-20250929') + ')');
const message = await this.claude.messages.create({
model: model || 'claude-sonnet-4-5-20250929',
max_tokens: 500,
temperature: 0.0,
system: 'You are a hallucination detector. Always respond with valid JSON only. Never include markdown formatting or code blocks.',
messages: [{ role: 'user', content: prompt }]
});
const rawResponse = message.content[0].type === 'text' ? message.content[0].text : '{}';
const result = this.parseLLMResponse(rawResponse);
console.log(' ✓ Detected:', result.detected, '| Risk:', result.risk);
return {
detected: result.detected || false,
risk: result.risk || 0,
hallucinated_parts: result.hallucinated_parts || []
};
}
}
exports.HallucinationDetector = HallucinationDetector;
//# sourceMappingURL=HallucinationDetector.js.map