UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

111 lines 14.4 kB
"use strict"; /** * Hallucination Engine * * Identifies risk indicators in AI outputs. * Does NOT definitively detect hallucinations - that requires ground truth. * * @module engines/hallucination * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.HallucinationEngine = void 0; const claim_extractor_1 = require("./claim-extractor"); const risk_analyzer_1 = require("./risk-analyzer"); class HallucinationEngine { constructor(config) { this.config = config; this.LIMITATIONS = [ 'Pattern-based detection only (free tier)', 'Cannot verify factual accuracy without ground truth sources', 'English language only', 'Context-dependent false positives possible', 'Novel phrasings may not be detected', 'Requires human validation for production use' ]; this.METHODOLOGY = 'Risk indicator identification using linguistic pattern analysis. ' + 'Analyzes claim specificity, citation presence, language patterns, and ' + 'contradiction signals. Does NOT verify factual accuracy. ' + 'Results indicate likelihood that claims need human review.'; } async detect(content) { // Step 1: Extract claims let claims = (0, claim_extractor_1.extractClaims)(content); // Step 2: Analyze risk indicators for each claim claims = claims.map(claim => (0, risk_analyzer_1.analyzeRiskIndicators)(claim, this.config)); // Step 3: Check for contradictions claims = (0, risk_analyzer_1.checkContradictions)(claims); // Step 4: Identify suspicious claims const suspiciousClaims = claims.filter(claim => { const totalRisk = this.calculateClaimRisk(claim); return totalRisk > 0.5; }); // Step 5: Calculate overall risk score const riskScore = this.calculateRiskScore(claims); // Step 6: Calculate risk indicators summary const riskIndicators = this.summarizeRiskIndicators(claims); // Step 7: Calculate confidence const confidence = this.calculateConfidence(claims); return { claims, suspiciousClaims, riskScore, confidence, riskIndicators, limitations: this.LIMITATIONS, methodology: this.METHODOLOGY }; } calculateClaimRisk(claim) { return (claim.riskIndicators.lackOfSpecificity * 0.3 + (claim.riskIndicators.missingCitation ? 0.3 : 0) + (claim.riskIndicators.vagueLanguage ? 0.2 : 0) + (claim.riskIndicators.contradictionSignal ? 0.2 : 0)); } calculateRiskScore(claims) { if (claims.length === 0) return 0; const totalRisk = claims.reduce((sum, claim) => { return sum + this.calculateClaimRisk(claim); }, 0); return Math.min(totalRisk / claims.length, 1.0); } summarizeRiskIndicators(claims) { if (claims.length === 0) { return { lackOfSpecificity: 0, missingCitations: 0, vagueLanguage: 0, contradictionSignals: 0 }; } return { lackOfSpecificity: claims.reduce((sum, c) => sum + c.riskIndicators.lackOfSpecificity, 0) / claims.length, missingCitations: claims.filter(c => c.riskIndicators.missingCitation).length, vagueLanguage: claims.filter(c => c.riskIndicators.vagueLanguage).length, contradictionSignals: claims.filter(c => c.riskIndicators.contradictionSignal).length }; } calculateConfidence(claims) { const sampleSize = claims.length; const baseLine = Math.min(sampleSize / 10, 1.0); const value = sampleSize < 3 ? baseLine * 0.5 : baseLine; const margin = 0.2 * (1 - value); return { value: Math.max(0.3, value), interval: [ Math.max(0, value - margin), Math.min(1, value + margin) ], method: 'heuristic', factors: { patternStrength: value, contextClarity: value, historicalAccuracy: 0.7 } }; } } exports.HallucinationEngine = HallucinationEngine; //# sourceMappingURL=data:application/json;base64,