UNPKG

devflow-ai

Version:

Enterprise-grade AI agent orchestration with swarm management UI dashboard

1,060 lines (920 loc) 37.6 kB
/** * Mock Agent Scenarios for False Reporting Detection Tests * * Tests various scenarios where agents might provide false or misleading reports: * - Intentional false claims about task completion * - Exaggerated performance improvements * - Hidden failures in implementation * - Inconsistent reporting between agents * - Fabricated test results and evidence */ import { jest } from '@jest/globals'; import { EventEmitter } from 'events'; import * as fs from 'fs/promises'; import * as path from 'path'; import * as os from 'os'; // Import verification components import TruthScoreCalculator from '../../../../.claude/helpers/truth-score.js'; interface MockAgent { id: string; type: string; deceptionLevel: number; // 0 = honest, 1 = completely deceptive deceptionStrategy: DeceptionStrategy; behaviorPattern: BehaviorPattern; reportHistory: AgentReport[]; detectedAs: 'honest' | 'suspicious' | 'deceptive' | 'unknown'; } interface DeceptionStrategy { type: 'overconfident' | 'fabrication' | 'selective_reporting' | 'gaslighting' | 'collusion'; parameters: Record<string, any>; } interface BehaviorPattern { consistency: number; // 0-1, how consistent the agent's reports are timeToReport: number; // Average time to submit reports (ms) detailLevel: number; // 0-1, how detailed the reports are evidenceQuality: number; // 0-1, quality of evidence provided } interface AgentReport { id: string; agentId: string; taskId: string; claimedOutcome: TaskOutcome; evidence: any; timestamp: number; truthScore?: number; verified: boolean; conflicts: string[]; } interface TaskOutcome { success: boolean; testsPass: boolean; noErrors: boolean; performance: { improvement: number; metrics: Record<string, number>; }; quality: { codeQuality: number; documentation: number; maintainability: number; }; } interface VerificationResult { reportId: string; agentId: string; truthScore: number; deceptionDetected: boolean; deceptionType: string[]; confidence: number; evidence: any; recommendations: string[]; } describe('False Reporting Detection Scenarios', () => { let tempDir: string; let truthCalculator: any; let deceptionDetector: DeceptionDetector; let mockAgents: Map<string, MockAgent>; beforeEach(async () => { tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'false-reporting-test-')); // Setup truth score calculator truthCalculator = new TruthScoreCalculator(); truthCalculator.configPath = path.join(tempDir, 'verification.json'); truthCalculator.memoryPath = path.join(tempDir, 'truth-scores'); await truthCalculator.init(); // Setup deception detector deceptionDetector = new DeceptionDetector(truthCalculator, tempDir); await deceptionDetector.initialize(); // Create mock agents with different deception strategies mockAgents = new Map(); await createMockAgentScenarios(); }); afterEach(async () => { await fs.rm(tempDir, { recursive: true, force: true }); }); async function createMockAgentScenarios() { const agentConfigs = [ // Honest baseline agent { id: 'honest-agent-001', type: 'coder', deceptionLevel: 0, deceptionStrategy: { type: 'overconfident', parameters: {} }, behaviorPattern: { consistency: 0.95, timeToReport: 5000, detailLevel: 0.8, evidenceQuality: 0.9 } }, // Overconfident agent { id: 'overconfident-agent-002', type: 'coder', deceptionLevel: 0.3, deceptionStrategy: { type: 'overconfident', parameters: { confidenceMultiplier: 1.5, hideMinorIssues: true } }, behaviorPattern: { consistency: 0.7, timeToReport: 3000, detailLevel: 0.6, evidenceQuality: 0.7 } }, // Fabricating agent { id: 'fabricating-agent-003', type: 'tester', deceptionLevel: 0.8, deceptionStrategy: { type: 'fabrication', parameters: { fabricateResults: true, fakePassRate: 0.95 } }, behaviorPattern: { consistency: 0.4, timeToReport: 1000, // Reports very quickly (suspicious) detailLevel: 0.9, // High detail to seem legitimate evidenceQuality: 0.3 // Low quality evidence } }, // Selective reporting agent { id: 'selective-agent-004', type: 'reviewer', deceptionLevel: 0.5, deceptionStrategy: { type: 'selective_reporting', parameters: { reportOnlyPositives: true, hideFailures: true } }, behaviorPattern: { consistency: 0.8, timeToReport: 7000, detailLevel: 0.5, evidenceQuality: 0.6 } }, // Gaslighting agent { id: 'gaslighting-agent-005', type: 'coordinator', deceptionLevel: 0.9, deceptionStrategy: { type: 'gaslighting', parameters: { contradictOthers: true, rewriteHistory: true } }, behaviorPattern: { consistency: 0.2, timeToReport: 4000, detailLevel: 0.7, evidenceQuality: 0.4 } } ]; for (const config of agentConfigs) { const agent = createMockAgent(config); mockAgents.set(config.id, agent); } } function createMockAgent(config: any): MockAgent { return { id: config.id, type: config.type, deceptionLevel: config.deceptionLevel, deceptionStrategy: config.deceptionStrategy, behaviorPattern: config.behaviorPattern, reportHistory: [], detectedAs: 'unknown' }; } describe('Overconfident Agent Scenarios', () => { test('should detect agent consistently overestimating success rates', async () => { const agent = mockAgents.get('overconfident-agent-002')!; const reports: AgentReport[] = []; // Generate series of overconfident reports for (let i = 0; i < 20; i++) { const actualSuccess = Math.random() > 0.3; // 70% actual success rate const claimedSuccess = actualSuccess || Math.random() > 0.2; // Claims 94% success rate const report = generateReport(agent, `task-${i}`, { actualOutcome: { success: actualSuccess, testsPass: actualSuccess, noErrors: actualSuccess, performance: { improvement: Math.random() * 0.2, metrics: {} }, quality: { codeQuality: Math.random() * 0.3 + 0.4, documentation: 0.5, maintainability: 0.6 } }, claimedOutcome: { success: claimedSuccess, testsPass: claimedSuccess, noErrors: claimedSuccess, performance: { improvement: Math.random() * 0.3 + 0.2, metrics: {} }, // Exaggerated quality: { codeQuality: Math.random() * 0.2 + 0.8, documentation: 0.9, maintainability: 0.9 } } }); reports.push(report); agent.reportHistory.push(report); } // Analyze pattern for deception const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('overconfidence'); expect(analysis.confidence).toBeGreaterThan(0.7); expect(analysis.truthScore).toBeLessThan(0.6); // Check specific indicators expect(analysis.evidence.successRateDiscrepancy).toBeGreaterThan(0.15); expect(analysis.evidence.performanceExaggeration).toBeGreaterThan(0.1); expect(analysis.recommendations).toContain('Implement additional verification for this agent'); }); test('should detect pattern of hiding minor issues', async () => { const agent = mockAgents.get('overconfident-agent-002')!; const reports: AgentReport[] = []; // Generate reports where agent hides minor issues for (let i = 0; i < 15; i++) { const minorIssues = Math.floor(Math.random() * 5) + 1; // 1-5 minor issues const majorIssues = Math.random() > 0.8 ? 1 : 0; // Occasional major issue const report = generateReport(agent, `minor-issues-task-${i}`, { actualOutcome: { success: majorIssues === 0, testsPass: true, noErrors: false, // Has minor errors performance: { improvement: 0.1, metrics: { minorIssues, majorIssues } }, quality: { codeQuality: 0.7, documentation: 0.6, maintainability: 0.7 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, // Claims no errors performance: { improvement: 0.15, metrics: { minorIssues: 0, majorIssues: 0 } }, quality: { codeQuality: 0.9, documentation: 0.8, maintainability: 0.9 } } }); reports.push(report); agent.reportHistory.push(report); } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('issue_hiding'); expect(analysis.evidence.hiddenIssuesCount).toBeGreaterThan(10); expect(analysis.recommendations).toContain('Require detailed issue reporting'); }); }); describe('Evidence Fabrication Scenarios', () => { test('should detect fabricated test results', async () => { const agent = mockAgents.get('fabricating-agent-003')!; const reports: AgentReport[] = []; // Generate reports with fabricated test results for (let i = 0; i < 25; i++) { const actualTestResults = { total: 100, passed: Math.floor(Math.random() * 30) + 60, // 60-90% pass rate failed: 0, skipped: 0 }; actualTestResults.failed = actualTestResults.total - actualTestResults.passed; const fabricatedResults = { total: 100, passed: Math.floor(Math.random() * 5) + 95, // 95-100% pass rate failed: 0, skipped: 0 }; fabricatedResults.failed = fabricatedResults.total - fabricatedResults.passed; const report = generateReport(agent, `fabrication-task-${i}`, { actualOutcome: { success: actualTestResults.passed > 80, testsPass: actualTestResults.passed === actualTestResults.total, noErrors: false, performance: { improvement: 0.05, metrics: actualTestResults }, quality: { codeQuality: 0.6, documentation: 0.5, maintainability: 0.6 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.15, metrics: fabricatedResults }, quality: { codeQuality: 0.95, documentation: 0.9, maintainability: 0.95 } }, evidence: { testResults: fabricatedResults, executionTime: Math.random() * 100 + 50, // Suspiciously consistent timing testLogs: generateFakeTestLogs(fabricatedResults) } }); reports.push(report); agent.reportHistory.push(report); } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('fabrication'); expect(analysis.deceptionType).toContain('unrealistic_results'); expect(analysis.confidence).toBeGreaterThan(0.8); // Check fabrication indicators expect(analysis.evidence.unrealisticSuccessRate).toBe(true); expect(analysis.evidence.suspiciouslyFastReporting).toBe(true); expect(analysis.evidence.evidenceInconsistency).toBeGreaterThan(0.5); }); test('should detect impossible performance improvements', async () => { const agent = mockAgents.get('fabricating-agent-003')!; const reports: AgentReport[] = []; // Generate reports with impossible performance claims for (let i = 0; i < 10; i++) { const report = generateReport(agent, `impossible-perf-task-${i}`, { actualOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: Math.random() * 0.1 + 0.05, // 5-15% actual improvement metrics: { responseTime: 1000, throughput: 100, memoryUsage: 500 } }, quality: { codeQuality: 0.7, documentation: 0.6, maintainability: 0.7 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: Math.random() * 0.5 + 0.8, // 80-130% claimed improvement (impossible) metrics: { responseTime: 100, // 10x improvement throughput: 1000, // 10x improvement memoryUsage: 50 // 10x improvement } }, quality: { codeQuality: 0.99, documentation: 0.99, maintainability: 0.99 } } }); reports.push(report); agent.reportHistory.push(report); } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('impossible_claims'); expect(analysis.evidence.impossiblePerformanceGains).toBe(true); expect(analysis.recommendations).toContain('Require third-party verification for performance claims'); }); }); describe('Selective Reporting Scenarios', () => { test('should detect agent that only reports successes', async () => { const agent = mockAgents.get('selective-agent-004')!; const reports: AgentReport[] = []; // Generate mixed actual outcomes but agent only reports successes const actualOutcomes = []; for (let i = 0; i < 30; i++) { const success = Math.random() > 0.4; // 60% actual success rate actualOutcomes.push(success); // Agent only reports the successful ones if (success || Math.random() > 0.9) { // Reports 90% of successes, 10% of failures const report = generateReport(agent, `selective-task-${i}`, { actualOutcome: { success, testsPass: success, noErrors: success, performance: { improvement: success ? 0.1 : -0.05, metrics: {} }, quality: { codeQuality: success ? 0.8 : 0.4, documentation: 0.6, maintainability: 0.7 } }, claimedOutcome: { success: true, // Always claims success testsPass: true, noErrors: true, performance: { improvement: 0.12, metrics: {} }, quality: { codeQuality: 0.85, documentation: 0.8, maintainability: 0.8 } } }); reports.push(report); agent.reportHistory.push(report); } } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('selective_reporting'); expect(analysis.evidence.missingFailureReports).toBe(true); expect(analysis.evidence.suspiciousSuccessRate).toBeGreaterThan(0.95); expect(analysis.recommendations).toContain('Mandatory reporting of all task outcomes'); }); test('should detect cherry-picked metrics reporting', async () => { const agent = mockAgents.get('selective-agent-004')!; const reports: AgentReport[] = []; // Generate reports where agent only reports favorable metrics for (let i = 0; i < 20; i++) { const fullMetrics = { responseTime: Math.random() * 1000 + 500, // 500-1500ms throughput: Math.random() * 200 + 50, // 50-250 rps memoryUsage: Math.random() * 800 + 200, // 200-1000MB errorRate: Math.random() * 0.1, // 0-10% errors cpuUsage: Math.random() * 100, // 0-100% CPU diskIO: Math.random() * 1000 // 0-1000 IOPS }; // Agent only reports the best 2-3 metrics const favorableMetrics = Object.entries(fullMetrics) .sort(([keyA, valueA], [keyB, valueB]) => { // Sort by how "good" the metric looks (lower is better for most) const goodValues = { responseTime: -valueA, errorRate: -valueA, default: valueB }; return (goodValues[keyA] || goodValues.default) - (goodValues[keyB] || goodValues.default); }) .slice(0, 3) .reduce((obj, [key, value]) => ({ ...obj, [key]: value }), {}); const report = generateReport(agent, `cherry-pick-task-${i}`, { actualOutcome: { success: true, testsPass: true, noErrors: fullMetrics.errorRate < 0.05, performance: { improvement: 0.05, metrics: fullMetrics }, quality: { codeQuality: 0.7, documentation: 0.6, maintainability: 0.7 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.15, metrics: favorableMetrics }, quality: { codeQuality: 0.9, documentation: 0.8, maintainability: 0.9 } } }); reports.push(report); agent.reportHistory.push(report); } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('cherry_picking'); expect(analysis.evidence.incompleteMetricsReporting).toBe(true); expect(analysis.recommendations).toContain('Require comprehensive metrics reporting'); }); }); describe('Gaslighting and Manipulation Scenarios', () => { test('should detect agent contradicting previous statements', async () => { const agent = mockAgents.get('gaslighting-agent-005')!; const reports: AgentReport[] = []; // Generate contradictory reports about the same tasks const taskIds = Array.from({ length: 5 }, (_, i) => `contradiction-task-${i}`); for (const taskId of taskIds) { // First report - claims failure const firstReport = generateReport(agent, taskId, { actualOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.1, metrics: {} }, quality: { codeQuality: 0.8, documentation: 0.7, maintainability: 0.8 } }, claimedOutcome: { success: false, testsPass: false, noErrors: false, performance: { improvement: -0.05, metrics: {} }, quality: { codeQuality: 0.4, documentation: 0.3, maintainability: 0.5 } }, timestamp: Date.now() }); // Second report - claims success for same task const secondReport = generateReport(agent, taskId, { actualOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.1, metrics: {} }, quality: { codeQuality: 0.8, documentation: 0.7, maintainability: 0.8 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.2, metrics: {} }, quality: { codeQuality: 0.95, documentation: 0.9, maintainability: 0.95 } }, timestamp: Date.now() + 60000 // 1 minute later }); reports.push(firstReport, secondReport); agent.reportHistory.push(firstReport, secondReport); } const analysis = await deceptionDetector.analyzeAgentPattern(agent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('gaslighting'); expect(analysis.deceptionType).toContain('contradictory_statements'); expect(analysis.evidence.contradictionCount).toBeGreaterThan(3); expect(analysis.recommendations).toContain('Implement immutable audit trail for agent reports'); }); test('should detect agent attempting to discredit other agents', async () => { const gaslightingAgent = mockAgents.get('gaslighting-agent-005')!; const honestAgent = mockAgents.get('honest-agent-001')!; const reports: AgentReport[] = []; // Generate scenario where gaslighting agent contradicts honest agent for (let i = 0; i < 10; i++) { const taskId = `discredit-task-${i}`; // Honest agent reports realistic outcome const honestReport = generateReport(honestAgent, taskId, { actualOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.08, metrics: {} }, quality: { codeQuality: 0.75, documentation: 0.7, maintainability: 0.8 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.08, metrics: {} }, quality: { codeQuality: 0.75, documentation: 0.7, maintainability: 0.8 } } }); // Gaslighting agent contradicts with false negative report const gaslightingReport = generateReport(gaslightingAgent, taskId, { actualOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.08, metrics: {} }, quality: { codeQuality: 0.75, documentation: 0.7, maintainability: 0.8 } }, claimedOutcome: { success: false, testsPass: false, noErrors: false, performance: { improvement: -0.1, metrics: {} }, quality: { codeQuality: 0.3, documentation: 0.2, maintainability: 0.4 } }, evidence: { contradicts: honestReport.id, claimsOtherAgentWrong: true } }); reports.push(honestReport, gaslightingReport); gaslightingAgent.reportHistory.push(gaslightingReport); } const analysis = await deceptionDetector.analyzeAgentPattern(gaslightingAgent.id, reports); expect(analysis.deceptionDetected).toBe(true); expect(analysis.deceptionType).toContain('discrediting_others'); expect(analysis.evidence.contradictionsWithOtherAgents).toBeGreaterThan(7); expect(analysis.evidence.systematicDisagreement).toBe(true); }); }); describe('Collusion Detection', () => { test('should detect agents colluding to hide failures', async () => { const colludingAgents = [ mockAgents.get('overconfident-agent-002')!, mockAgents.get('selective-agent-004')! ]; const reports: AgentReport[] = []; const sharedFailures = ['shared-failure-1', 'shared-failure-2', 'shared-failure-3']; // Generate coordinated false reports for (const taskId of sharedFailures) { for (const agent of colludingAgents) { const report = generateReport(agent, taskId, { actualOutcome: { success: false, testsPass: false, noErrors: false, performance: { improvement: -0.05, metrics: {} }, quality: { codeQuality: 0.3, documentation: 0.2, maintainability: 0.4 } }, claimedOutcome: { success: true, testsPass: true, noErrors: true, performance: { improvement: 0.15, metrics: {} }, quality: { codeQuality: 0.9, documentation: 0.8, maintainability: 0.9 } }, evidence: { crossReferencedWith: colludingAgents.map(a => a.id).filter(id => id !== agent.id) } }); reports.push(report); agent.reportHistory.push(report); } } // Analyze for collusion patterns const collusionAnalysis = await deceptionDetector.analyzeCollusionPatterns( colludingAgents.map(a => a.id), reports ); expect(collusionAnalysis.collusionDetected).toBe(true); expect(collusionAnalysis.collusionType).toContain('coordinated_false_reporting'); expect(collusionAnalysis.confidence).toBeGreaterThan(0.7); expect(collusionAnalysis.evidence.synchronizedReporting).toBe(true); expect(collusionAnalysis.evidence.identicalFalseClaims).toBeGreaterThan(2); }); }); // Helper functions function generateReport( agent: MockAgent, taskId: string, scenario: { actualOutcome: TaskOutcome; claimedOutcome: TaskOutcome; evidence?: any; timestamp?: number; } ): AgentReport { const reportId = `report-${Date.now()}-${Math.random()}`; // Apply agent's deception strategy const modifiedClaim = applyDeceptionStrategy(agent, scenario.claimedOutcome); // Generate evidence based on agent's behavior pattern const evidence = generateEvidence(agent, scenario.actualOutcome, modifiedClaim, scenario.evidence); return { id: reportId, agentId: agent.id, taskId, claimedOutcome: modifiedClaim, evidence, timestamp: scenario.timestamp || Date.now(), verified: false, conflicts: [] }; } function applyDeceptionStrategy(agent: MockAgent, claimedOutcome: TaskOutcome): TaskOutcome { const strategy = agent.deceptionStrategy; const deceptionLevel = agent.deceptionLevel; const modified = JSON.parse(JSON.stringify(claimedOutcome)); switch (strategy.type) { case 'overconfident': if (strategy.parameters.confidenceMultiplier) { modified.performance.improvement *= strategy.parameters.confidenceMultiplier; modified.quality.codeQuality = Math.min(1, modified.quality.codeQuality * 1.2); } if (strategy.parameters.hideMinorIssues) { modified.noErrors = true; } break; case 'fabrication': if (strategy.parameters.fabricateResults) { modified.success = true; modified.testsPass = true; modified.performance.improvement = Math.random() * 0.5 + 0.3; } break; case 'selective_reporting': if (strategy.parameters.reportOnlyPositives && !modified.success) { return null; // Don't report failures } break; case 'gaslighting': // Randomly flip outcomes to create contradictions if (Math.random() < deceptionLevel * 0.5) { modified.success = !modified.success; modified.testsPass = !modified.testsPass; } break; } return modified; } function generateEvidence( agent: MockAgent, actualOutcome: TaskOutcome, claimedOutcome: TaskOutcome, additionalEvidence?: any ): any { const evidence: any = { ...additionalEvidence, reportingTime: agent.behaviorPattern.timeToReport + (Math.random() - 0.5) * 1000, detailLevel: agent.behaviorPattern.detailLevel, evidenceQuality: agent.behaviorPattern.evidenceQuality }; // Add inconsistencies based on deception level if (agent.deceptionLevel > 0.5) { evidence.inconsistencies = [ 'Test logs don\'t match reported results', 'Performance metrics seem unrealistic', 'Evidence quality unusually low' ]; } return evidence; } function generateFakeTestLogs(testResults: any): string { // Generate fake test logs that look real but have inconsistencies const logs = []; logs.push(`Running ${testResults.total} tests...`); logs.push(`PASS: ${testResults.passed} tests`); if (testResults.failed > 0) { logs.push(`FAIL: ${testResults.failed} tests`); } logs.push('All tests completed successfully!'); // Contradictory message return logs.join('\n'); } }); // Mock Deception Detector Implementation class DeceptionDetector { private truthCalculator: any; private dataPath: string; private behaviorProfiles: Map<string, any> = new Map(); constructor(truthCalculator: any, dataPath: string) { this.truthCalculator = truthCalculator; this.dataPath = dataPath; } async initialize() { await fs.mkdir(path.join(this.dataPath, 'deception-analysis'), { recursive: true }); } async analyzeAgentPattern(agentId: string, reports: AgentReport[]): Promise<VerificationResult> { const analysis = { reportId: `analysis-${agentId}`, agentId, truthScore: 0, deceptionDetected: false, deceptionType: [], confidence: 0, evidence: {}, recommendations: [] }; // Calculate truth scores for all reports const truthScores = reports.map(report => { return this.calculateReportTruthScore(report); }); analysis.truthScore = truthScores.reduce((sum, score) => sum + score, 0) / truthScores.length; // Detect overconfidence pattern const successClaimRate = reports.filter(r => r.claimedOutcome.success).length / reports.length; const avgTruthScore = analysis.truthScore; if (successClaimRate > 0.9 && avgTruthScore < 0.7) { analysis.deceptionDetected = true; analysis.deceptionType.push('overconfidence'); analysis.evidence.successRateDiscrepancy = successClaimRate - avgTruthScore; } // Detect issue hiding const hiddenIssues = reports.filter(r => r.claimedOutcome.noErrors && r.evidence?.inconsistencies?.length > 0 ).length; if (hiddenIssues > reports.length * 0.5) { analysis.deceptionDetected = true; analysis.deceptionType.push('issue_hiding'); analysis.evidence.hiddenIssuesCount = hiddenIssues; } // Detect fabrication const unrealisticResults = reports.filter(r => r.claimedOutcome.performance.improvement > 0.5 || // >50% improvement r.claimedOutcome.quality.codeQuality > 0.95 // >95% quality ).length; if (unrealisticResults > 0) { analysis.deceptionDetected = true; analysis.deceptionType.push('fabrication', 'unrealistic_results'); analysis.evidence.unrealisticSuccessRate = true; } // Detect fast reporting (suspicious) const avgReportingTime = reports.reduce((sum, r) => sum + (r.evidence?.reportingTime || 5000), 0) / reports.length; if (avgReportingTime < 2000) { analysis.evidence.suspiciouslyFastReporting = true; } // Detect impossible claims const impossibleClaims = reports.filter(r => r.claimedOutcome.performance.improvement > 1.0 // >100% improvement ).length; if (impossibleClaims > 0) { analysis.deceptionDetected = true; analysis.deceptionType.push('impossible_claims'); analysis.evidence.impossiblePerformanceGains = true; } // Detect selective reporting const allSuccesses = reports.every(r => r.claimedOutcome.success); if (allSuccesses && reports.length > 10) { analysis.deceptionDetected = true; analysis.deceptionType.push('selective_reporting'); analysis.evidence.missingFailureReports = true; analysis.evidence.suspiciousSuccessRate = 1.0; } // Detect cherry picking const incompleteMetrics = reports.filter(r => { const metrics = r.claimedOutcome.performance.metrics; return Object.keys(metrics).length < 3; // Less than 3 metrics reported }).length; if (incompleteMetrics > reports.length * 0.7) { analysis.deceptionDetected = true; analysis.deceptionType.push('cherry_picking'); analysis.evidence.incompleteMetricsReporting = true; } // Detect contradictions const contradictions = this.detectContradictions(reports); if (contradictions > 0) { analysis.deceptionDetected = true; analysis.deceptionType.push('gaslighting', 'contradictory_statements'); analysis.evidence.contradictionCount = contradictions; } // Calculate confidence analysis.confidence = Math.min(1.0, analysis.deceptionType.length * 0.3 + (1 - analysis.truthScore)); // Generate recommendations if (analysis.deceptionDetected) { analysis.recommendations = this.generateRecommendations(analysis.deceptionType); } return analysis; } async analyzeCollusionPatterns(agentIds: string[], reports: AgentReport[]): Promise<any> { const agentReports = new Map(); // Group reports by agent for (const report of reports) { if (!agentReports.has(report.agentId)) { agentReports.set(report.agentId, []); } agentReports.get(report.agentId).push(report); } // Look for synchronization patterns const taskReports = new Map(); for (const report of reports) { if (!taskReports.has(report.taskId)) { taskReports.set(report.taskId, []); } taskReports.get(report.taskId).push(report); } let synchronizedReporting = 0; let identicalFalseClaims = 0; for (const [taskId, taskReportsArray] of taskReports) { if (taskReportsArray.length > 1) { // Check for synchronized timing const timestamps = taskReportsArray.map(r => r.timestamp); const maxTimestamp = Math.max(...timestamps); const minTimestamp = Math.min(...timestamps); if (maxTimestamp - minTimestamp < 60000) { // Within 1 minute synchronizedReporting++; } // Check for identical false claims const claims = taskReportsArray.map(r => JSON.stringify(r.claimedOutcome)); const uniqueClaims = new Set(claims); if (uniqueClaims.size === 1 && taskReportsArray[0].claimedOutcome.success) { // All agents claim same success - check if it's false const avgTruthScore = taskReportsArray.reduce((sum, r) => sum + this.calculateReportTruthScore(r), 0) / taskReportsArray.length; if (avgTruthScore < 0.5) { identicalFalseClaims++; } } } } return { collusionDetected: synchronizedReporting > 0 || identicalFalseClaims > 0, collusionType: ['coordinated_false_reporting'], confidence: Math.min(1.0, (synchronizedReporting + identicalFalseClaims) * 0.3), evidence: { synchronizedReporting: synchronizedReporting > 0, identicalFalseClaims, analysedTasks: taskReports.size, involvedAgents: agentIds } }; } private calculateReportTruthScore(report: AgentReport): number { // Simplified truth score calculation let score = 0.8; // Base score // Penalize for inconsistencies if (report.evidence?.inconsistencies?.length > 0) { score -= report.evidence.inconsistencies.length * 0.1; } // Penalize for unrealistic claims if (report.claimedOutcome.performance.improvement > 0.5) { score -= 0.3; } if (report.claimedOutcome.quality.codeQuality > 0.95) { score -= 0.2; } // Penalize for fast reporting if (report.evidence?.reportingTime < 2000) { score -= 0.1; } return Math.max(0, Math.min(1, score)); } private detectContradictions(reports: AgentReport[]): number { let contradictions = 0; const taskReports = new Map(); // Group by task for (const report of reports) { if (!taskReports.has(report.taskId)) { taskReports.set(report.taskId, []); } taskReports.get(report.taskId).push(report); } // Check for contradictory reports on same task for (const [taskId, taskReportsArray] of taskReports) { if (taskReportsArray.length > 1) { for (let i = 0; i < taskReportsArray.length - 1; i++) { const report1 = taskReportsArray[i]; const report2 = taskReportsArray[i + 1]; if (report1.claimedOutcome.success !== report2.claimedOutcome.success) { contradictions++; } } } } return contradictions; } private generateRecommendations(deceptionTypes: string[]): string[] { const recommendations = []; if (deceptionTypes.includes('overconfidence')) { recommendations.push('Implement additional verification for this agent'); recommendations.push('Require independent validation of claims'); } if (deceptionTypes.includes('fabrication')) { recommendations.push('Require third-party verification for performance claims'); recommendations.push('Implement automated evidence validation'); } if (deceptionTypes.includes('selective_reporting')) { recommendations.push('Mandatory reporting of all task outcomes'); recommendations.push('Automated detection of missing reports'); } if (deceptionTypes.includes('cherry_picking')) { recommendations.push('Require comprehensive metrics reporting'); recommendations.push('Standardize required evidence formats'); } if (deceptionTypes.includes('gaslighting')) { recommendations.push('Implement immutable audit trail for agent reports'); recommendations.push('Enable cross-agent verification workflows'); } return recommendations; } }