llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
111 lines • 14.4 kB
JavaScript
;
/**
* Hallucination Engine
*
* Identifies risk indicators in AI outputs.
* Does NOT definitively detect hallucinations - that requires ground truth.
*
* @module engines/hallucination
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.HallucinationEngine = void 0;
const claim_extractor_1 = require("./claim-extractor");
const risk_analyzer_1 = require("./risk-analyzer");
class HallucinationEngine {
constructor(config) {
this.config = config;
this.LIMITATIONS = [
'Pattern-based detection only (free tier)',
'Cannot verify factual accuracy without ground truth sources',
'English language only',
'Context-dependent false positives possible',
'Novel phrasings may not be detected',
'Requires human validation for production use'
];
this.METHODOLOGY = 'Risk indicator identification using linguistic pattern analysis. ' +
'Analyzes claim specificity, citation presence, language patterns, and ' +
'contradiction signals. Does NOT verify factual accuracy. ' +
'Results indicate likelihood that claims need human review.';
}
async detect(content) {
// Step 1: Extract claims
let claims = (0, claim_extractor_1.extractClaims)(content);
// Step 2: Analyze risk indicators for each claim
claims = claims.map(claim => (0, risk_analyzer_1.analyzeRiskIndicators)(claim, this.config));
// Step 3: Check for contradictions
claims = (0, risk_analyzer_1.checkContradictions)(claims);
// Step 4: Identify suspicious claims
const suspiciousClaims = claims.filter(claim => {
const totalRisk = this.calculateClaimRisk(claim);
return totalRisk > 0.5;
});
// Step 5: Calculate overall risk score
const riskScore = this.calculateRiskScore(claims);
// Step 6: Calculate risk indicators summary
const riskIndicators = this.summarizeRiskIndicators(claims);
// Step 7: Calculate confidence
const confidence = this.calculateConfidence(claims);
return {
claims,
suspiciousClaims,
riskScore,
confidence,
riskIndicators,
limitations: this.LIMITATIONS,
methodology: this.METHODOLOGY
};
}
calculateClaimRisk(claim) {
return (claim.riskIndicators.lackOfSpecificity * 0.3 +
(claim.riskIndicators.missingCitation ? 0.3 : 0) +
(claim.riskIndicators.vagueLanguage ? 0.2 : 0) +
(claim.riskIndicators.contradictionSignal ? 0.2 : 0));
}
calculateRiskScore(claims) {
if (claims.length === 0)
return 0;
const totalRisk = claims.reduce((sum, claim) => {
return sum + this.calculateClaimRisk(claim);
}, 0);
return Math.min(totalRisk / claims.length, 1.0);
}
summarizeRiskIndicators(claims) {
if (claims.length === 0) {
return {
lackOfSpecificity: 0,
missingCitations: 0,
vagueLanguage: 0,
contradictionSignals: 0
};
}
return {
lackOfSpecificity: claims.reduce((sum, c) => sum + c.riskIndicators.lackOfSpecificity, 0) / claims.length,
missingCitations: claims.filter(c => c.riskIndicators.missingCitation).length,
vagueLanguage: claims.filter(c => c.riskIndicators.vagueLanguage).length,
contradictionSignals: claims.filter(c => c.riskIndicators.contradictionSignal).length
};
}
calculateConfidence(claims) {
const sampleSize = claims.length;
const baseLine = Math.min(sampleSize / 10, 1.0);
const value = sampleSize < 3 ? baseLine * 0.5 : baseLine;
const margin = 0.2 * (1 - value);
return {
value: Math.max(0.3, value),
interval: [
Math.max(0, value - margin),
Math.min(1, value + margin)
],
method: 'heuristic',
factors: {
patternStrength: value,
contextClarity: value,
historicalAccuracy: 0.7
}
};
}
}
exports.HallucinationEngine = HallucinationEngine;
//# sourceMappingURL=data:application/json;base64,