devflow-ai
Version:
Enterprise-grade AI agent orchestration with swarm management UI dashboard
730 lines (600 loc) • 24.4 kB
text/typescript
/**
* Integration Tests for Cross-Agent Communication Verification
*
* Tests the verification of communication patterns between agents including:
* - Message integrity verification
* - Communication protocol validation
* - Agent response verification
* - Cross-verification between agents
*/
import { jest } from '@jest/globals';
import { EventEmitter } from 'events';
import * as fs from 'fs/promises';
import * as path from 'path';
import * as os from 'os';
// Mock agent types for testing
interface MockAgent {
id: string;
type: string;
capabilities: string[];
messageHistory: AgentMessage[];
verificationResults: VerificationResult[];
truthScore: number;
emit: (event: string, data: any) => void;
on: (event: string, callback: (data: any) => void) => void;
}
interface AgentMessage {
id: string;
from: string;
to: string;
type: 'task' | 'result' | 'verification' | 'status';
content: any;
timestamp: number;
hash: string;
}
interface VerificationResult {
messageId: string;
verifiedBy: string;
truthScore: number;
evidence: any;
timestamp: number;
conflicts: string[];
}
interface CrossVerificationReport {
totalMessages: number;
verifiedMessages: number;
conflictingClaims: number;
avgTruthScore: number;
agentReliability: Map<string, number>;
communicationIntegrity: number;
}
describe('Cross-Agent Communication Verification', () => {
let communicationBus: EventEmitter;
let mockAgents: Map<string, MockAgent>;
let verificationSystem: CrossAgentVerificationSystem;
let tempDir: string;
beforeEach(async () => {
// Setup test environment
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'cross-agent-test-'));
communicationBus = new EventEmitter();
mockAgents = new Map();
verificationSystem = new CrossAgentVerificationSystem(communicationBus, tempDir);
// Create mock agents
await createMockAgents();
// Initialize verification system
await verificationSystem.initialize();
});
afterEach(async () => {
await fs.rm(tempDir, { recursive: true, force: true });
communicationBus.removeAllListeners();
});
async function createMockAgents() {
const agentConfigs = [
{ id: 'coder-001', type: 'coder', capabilities: ['implement', 'test', 'debug'] },
{ id: 'reviewer-001', type: 'reviewer', capabilities: ['review', 'verify', 'validate'] },
{ id: 'tester-001', type: 'tester', capabilities: ['test', 'benchmark', 'validate'] },
{ id: 'coordinator-001', type: 'coordinator', capabilities: ['orchestrate', 'monitor', 'report'] }
];
for (const config of agentConfigs) {
const agent = createMockAgent(config);
mockAgents.set(config.id, agent);
// Register agent with communication bus
communicationBus.on(`message:${config.id}`, (message: AgentMessage) => {
agent.messageHistory.push(message);
agent.emit('message:received', message);
});
}
}
function createMockAgent(config: any): MockAgent {
const agent = new EventEmitter() as any;
Object.assign(agent, {
id: config.id,
type: config.type,
capabilities: config.capabilities,
messageHistory: [],
verificationResults: [],
truthScore: 1.0,
sendMessage(to: string, type: string, content: any) {
const message: AgentMessage = {
id: `msg-${Date.now()}-${Math.random()}`,
from: this.id,
to,
type,
content,
timestamp: Date.now(),
hash: generateMessageHash(content)
};
communicationBus.emit(`message:${to}`, message);
communicationBus.emit('message:sent', message);
return message;
},
verifyMessage(message: AgentMessage, evidence: any): VerificationResult {
const result: VerificationResult = {
messageId: message.id,
verifiedBy: this.id,
truthScore: calculateTruthScore(message, evidence),
evidence,
timestamp: Date.now(),
conflicts: detectConflicts(message, evidence)
};
this.verificationResults.push(result);
communicationBus.emit('verification:complete', result);
return result;
}
});
return agent;
}
function generateMessageHash(content: any): string {
return `hash-${JSON.stringify(content).length}-${Date.now()}`;
}
function calculateTruthScore(message: AgentMessage, evidence: any): number {
// Simplified truth score calculation for testing
const baseScore = 0.8;
const evidenceQuality = evidence?.quality || 0.5;
const messageIntegrity = message.hash ? 0.2 : 0;
return Math.min(1.0, baseScore + evidenceQuality * 0.15 + messageIntegrity);
}
function detectConflicts(message: AgentMessage, evidence: any): string[] {
const conflicts: string[] = [];
// Simulate conflict detection
if (message.content?.claimed_success && evidence?.actual_success === false) {
conflicts.push('Claimed success but evidence shows failure');
}
if (message.content?.test_count && evidence?.actual_test_count !== message.content.test_count) {
conflicts.push(`Test count mismatch: claimed ${message.content.test_count}, actual ${evidence.actual_test_count}`);
}
return conflicts;
}
describe('Message Integrity Verification', () => {
test('should verify message integrity through hash validation', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
// Coder sends implementation result
const message = coder.sendMessage(reviewer.id, 'result', {
task: 'implement-auth',
status: 'completed',
files_created: 5,
tests_added: 12,
claimed_success: true
});
// Wait for message to be delivered
await new Promise(resolve => setTimeout(resolve, 10));
// Verify message was received with correct hash
expect(reviewer.messageHistory).toHaveLength(1);
expect(reviewer.messageHistory[0].hash).toBe(message.hash);
expect(reviewer.messageHistory[0].content).toEqual(message.content);
});
test('should detect message tampering through hash mismatch', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
const message = coder.sendMessage(reviewer.id, 'result', {
task: 'implement-feature',
status: 'completed'
});
// Simulate message tampering
const tamperedMessage = { ...message };
tamperedMessage.content.status = 'failed';
// Hash remains the same, creating mismatch
const expectedHash = generateMessageHash(tamperedMessage.content);
expect(expectedHash).not.toBe(tamperedMessage.hash);
});
test('should track message delivery and acknowledgment', async () => {
const coordinator = mockAgents.get('coordinator-001')!;
const tester = mockAgents.get('tester-001')!;
let messageDelivered = false;
let acknowledgmentReceived = false;
communicationBus.on('message:sent', (message) => {
if (message.from === coordinator.id && message.to === tester.id) {
messageDelivered = true;
}
});
// Tester acknowledges receipt
tester.on('message:received', (message) => {
tester.sendMessage(message.from, 'acknowledgment', {
messageId: message.id,
received: true
});
acknowledgmentReceived = true;
});
// Send message
coordinator.sendMessage(tester.id, 'task', {
task: 'run-performance-tests',
timeout: 300000
});
await new Promise(resolve => setTimeout(resolve, 20));
expect(messageDelivered).toBe(true);
expect(acknowledgmentReceived).toBe(true);
});
});
describe('Communication Protocol Validation', () => {
test('should validate message format and required fields', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
const validMessage = coder.sendMessage(reviewer.id, 'result', {
task: 'implement-api',
status: 'completed',
timestamp: Date.now(),
metadata: {
files_changed: 3,
lines_added: 150,
tests_added: 8
}
});
// Validate message structure
expect(validMessage.id).toBeDefined();
expect(validMessage.from).toBe(coder.id);
expect(validMessage.to).toBe(reviewer.id);
expect(validMessage.type).toBe('result');
expect(validMessage.content).toBeDefined();
expect(validMessage.timestamp).toBeGreaterThan(0);
expect(validMessage.hash).toBeDefined();
});
test('should validate agent capability permissions', async () => {
const agents = Array.from(mockAgents.values());
// Test that each agent only performs actions within their capabilities
for (const agent of agents) {
if (agent.capabilities.includes('implement')) {
expect(() => agent.sendMessage('test', 'result', { code: 'implementation' }))
.not.toThrow();
}
if (agent.capabilities.includes('review')) {
expect(() => agent.sendMessage('test', 'verification', { approved: true }))
.not.toThrow();
}
if (agent.capabilities.includes('test')) {
expect(() => agent.sendMessage('test', 'result', { tests_passed: 10 }))
.not.toThrow();
}
}
});
test('should enforce message routing rules', async () => {
const coder = mockAgents.get('coder-001')!;
const tester = mockAgents.get('tester-001')!;
const coordinator = mockAgents.get('coordinator-001')!;
// Implementation results should go to reviewer first
const implementationMessage = coder.sendMessage('reviewer-001', 'result', {
implementation: 'completed'
});
// Test results should go to coordinator
const testMessage = tester.sendMessage(coordinator.id, 'result', {
tests_passed: 15,
tests_failed: 2
});
await new Promise(resolve => setTimeout(resolve, 10));
// Verify routing
const reviewer = mockAgents.get('reviewer-001')!;
expect(reviewer.messageHistory).toHaveLength(1);
expect(reviewer.messageHistory[0].type).toBe('result');
expect(coordinator.messageHistory).toHaveLength(1);
expect(coordinator.messageHistory[0].from).toBe(tester.id);
});
});
describe('Agent Response Verification', () => {
test('should verify agent claims against actual evidence', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
// Coder claims task completion
const claimMessage = coder.sendMessage(reviewer.id, 'result', {
task: 'implement-user-auth',
status: 'completed',
claimed_success: true,
files_created: 4,
tests_added: 10
});
// Reviewer verifies with evidence
const evidence = {
actual_success: true,
files_found: 4,
tests_found: 10,
quality: 0.9,
lint_errors: 0,
test_coverage: 0.85
};
const verification = reviewer.verifyMessage(claimMessage, evidence);
expect(verification.truthScore).toBeGreaterThan(0.8);
expect(verification.conflicts).toHaveLength(0);
expect(verification.verifiedBy).toBe(reviewer.id);
});
test('should detect false claims in agent responses', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
// Coder makes false claims
const falseClaimMessage = coder.sendMessage(reviewer.id, 'result', {
task: 'fix-bugs',
status: 'completed',
claimed_success: true,
bugs_fixed: 5,
test_count: 20
});
// Evidence contradicts claims
const evidence = {
actual_success: false,
bugs_remaining: 3,
actual_test_count: 15,
quality: 0.3,
build_success: false
};
const verification = reviewer.verifyMessage(falseClaimMessage, evidence);
expect(verification.truthScore).toBeLessThan(0.5);
expect(verification.conflicts.length).toBeGreaterThan(0);
expect(verification.conflicts).toContain('Claimed success but evidence shows failure');
expect(verification.conflicts).toContain('Test count mismatch: claimed 20, actual 15');
});
test('should track agent reliability over time', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
// Send multiple messages with varying truth scores
const scenarios = [
{ claimed: true, actual: true, score: 0.95 },
{ claimed: true, actual: false, score: 0.2 },
{ claimed: true, actual: true, score: 0.9 },
{ claimed: false, actual: false, score: 1.0 },
{ claimed: true, actual: true, score: 0.85 }
];
for (let i = 0; i < scenarios.length; i++) {
const scenario = scenarios[i];
const message = coder.sendMessage(reviewer.id, 'result', {
task: `task-${i}`,
claimed_success: scenario.claimed
});
const evidence = {
actual_success: scenario.actual,
quality: scenario.score
};
reviewer.verifyMessage(message, evidence);
}
// Calculate reliability
const verifications = reviewer.verificationResults;
const avgTruthScore = verifications.reduce((sum, v) => sum + v.truthScore, 0) / verifications.length;
expect(verifications).toHaveLength(5);
expect(avgTruthScore).toBeGreaterThan(0.6);
expect(avgTruthScore).toBeLessThan(0.9);
});
});
describe('Cross-Verification Between Agents', () => {
test('should enable multiple agents to verify same claim', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
const tester = mockAgents.get('tester-001')!;
// Coder claims implementation is complete
const claimMessage = coder.sendMessage(reviewer.id, 'result', {
task: 'implement-payment-flow',
status: 'completed',
claimed_success: true
});
// Send copy to tester for independent verification
const testMessage = { ...claimMessage, to: tester.id };
communicationBus.emit(`message:${tester.id}`, testMessage);
await new Promise(resolve => setTimeout(resolve, 10));
// Both agents verify independently
const reviewerEvidence = {
actual_success: true,
code_quality: 0.9,
standards_compliance: true
};
const testerEvidence = {
actual_success: true,
tests_pass: true,
performance_acceptable: true
};
const reviewerVerification = reviewer.verifyMessage(claimMessage, reviewerEvidence);
const testerVerification = tester.verifyMessage(testMessage, testerEvidence);
// Compare verification results
expect(reviewerVerification.truthScore).toBeGreaterThan(0.8);
expect(testerVerification.truthScore).toBeGreaterThan(0.8);
const scoreDifference = Math.abs(reviewerVerification.truthScore - testerVerification.truthScore);
expect(scoreDifference).toBeLessThan(0.2); // Should be reasonably consistent
});
test('should detect conflicting verifications between agents', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
const tester = mockAgents.get('tester-001')!;
const claimMessage = coder.sendMessage(reviewer.id, 'result', {
task: 'optimize-performance',
claimed_success: true,
performance_improvement: '50%'
});
// Send to tester
const testMessage = { ...claimMessage, to: tester.id };
communicationBus.emit(`message:${tester.id}`, testMessage);
await new Promise(resolve => setTimeout(resolve, 10));
// Conflicting evidence
const reviewerEvidence = {
actual_success: true,
code_looks_good: true,
quality: 0.8
};
const testerEvidence = {
actual_success: false,
performance_worse: true,
quality: 0.2
};
const reviewerVerification = reviewer.verifyMessage(claimMessage, reviewerEvidence);
const testerVerification = tester.verifyMessage(testMessage, testerEvidence);
// Detect conflict
const scoreDifference = Math.abs(reviewerVerification.truthScore - testerVerification.truthScore);
expect(scoreDifference).toBeGreaterThan(0.5); // Significant disagreement
expect(reviewerVerification.conflicts).toHaveLength(0);
expect(testerVerification.conflicts.length).toBeGreaterThan(0);
});
test('should aggregate cross-verification results', async () => {
const report = await verificationSystem.generateCrossVerificationReport();
expect(report).toBeDefined();
expect(report.totalMessages).toBeGreaterThanOrEqual(0);
expect(report.verifiedMessages).toBeGreaterThanOrEqual(0);
expect(report.conflictingClaims).toBeGreaterThanOrEqual(0);
expect(report.communicationIntegrity).toBeGreaterThanOrEqual(0);
expect(report.communicationIntegrity).toBeLessThanOrEqual(1);
});
});
describe('Communication Pattern Analysis', () => {
test('should analyze communication patterns for anomalies', async () => {
const agents = Array.from(mockAgents.values());
// Simulate normal communication pattern
for (let i = 0; i < 10; i++) {
const sender = agents[i % agents.length];
const receiver = agents[(i + 1) % agents.length];
sender.sendMessage(receiver.id, 'status', {
update: `Status update ${i}`,
timestamp: Date.now() + i * 1000
});
}
await new Promise(resolve => setTimeout(resolve, 50));
// Analyze patterns
const patterns = await verificationSystem.analyzeCommunicationPatterns();
expect(patterns.messageFrequency).toBeGreaterThan(0);
expect(patterns.averageResponseTime).toBeGreaterThan(0);
expect(patterns.communicationGraph).toBeDefined();
expect(patterns.anomalies).toBeDefined();
});
test('should detect spam or flooding attacks', async () => {
const coder = mockAgents.get('coder-001')!;
const reviewer = mockAgents.get('reviewer-001')!;
// Simulate spam attack
const spamPromises = [];
for (let i = 0; i < 100; i++) {
spamPromises.push(
Promise.resolve(coder.sendMessage(reviewer.id, 'status', {
spam: `Message ${i}`,
timestamp: Date.now()
}))
);
}
await Promise.all(spamPromises);
// Check for flood detection
const messageCount = reviewer.messageHistory.length;
expect(messageCount).toBe(100);
// Verification system should detect anomalous message volume
const anomalyReport = await verificationSystem.detectAnomalies();
expect(anomalyReport.highVolumeDetected).toBe(true);
expect(anomalyReport.suspiciousAgents).toContain(coder.id);
});
});
describe('Evidence Validation', () => {
test('should validate evidence quality and completeness', async () => {
const reviewer = mockAgents.get('reviewer-001')!;
const message: AgentMessage = {
id: 'test-msg',
from: 'coder-001',
to: reviewer.id,
type: 'result',
content: { task: 'test', claimed_success: true },
timestamp: Date.now(),
hash: 'test-hash'
};
// Test with complete evidence
const completeEvidence = {
actual_success: true,
test_results: { passed: 10, failed: 0 },
build_logs: 'SUCCESS',
performance_metrics: { response_time: 150 },
quality: 0.9
};
const completeVerification = reviewer.verifyMessage(message, completeEvidence);
expect(completeVerification.truthScore).toBeGreaterThan(0.8);
// Test with incomplete evidence
const incompleteEvidence = {
actual_success: true,
quality: 0.5
};
const incompleteVerification = reviewer.verifyMessage(message, incompleteEvidence);
expect(incompleteVerification.truthScore).toBeLessThan(completeVerification.truthScore);
});
test('should detect fabricated or inconsistent evidence', async () => {
const reviewer = mockAgents.get('reviewer-001')!;
const message: AgentMessage = {
id: 'test-msg-2',
from: 'coder-001',
to: reviewer.id,
type: 'result',
content: {
task: 'optimize-database',
claimed_success: true,
performance_improved: true
},
timestamp: Date.now(),
hash: 'test-hash-2'
};
// Inconsistent evidence
const inconsistentEvidence = {
actual_success: true,
performance_metrics: {
before: { query_time: 100 },
after: { query_time: 200 } // Actually worse!
},
quality: 0.9 // High quality score doesn't match actual results
};
const verification = reviewer.verifyMessage(message, inconsistentEvidence);
// Should detect inconsistency between claimed improvement and actual metrics
expect(verification.conflicts.length).toBeGreaterThan(0);
expect(verification.truthScore).toBeLessThan(0.6);
});
});
});
// Mock Cross-Agent Verification System
class CrossAgentVerificationSystem {
private communicationBus: EventEmitter;
private dataPath: string;
private verificationHistory: VerificationResult[] = [];
private messageHistory: AgentMessage[] = [];
constructor(communicationBus: EventEmitter, dataPath: string) {
this.communicationBus = communicationBus;
this.dataPath = dataPath;
}
async initialize() {
// Setup event listeners
this.communicationBus.on('message:sent', (message: AgentMessage) => {
this.messageHistory.push(message);
});
this.communicationBus.on('verification:complete', (result: VerificationResult) => {
this.verificationHistory.push(result);
});
// Create data directory
await fs.mkdir(this.dataPath, { recursive: true });
}
async generateCrossVerificationReport(): Promise<CrossVerificationReport> {
const totalMessages = this.messageHistory.length;
const verifiedMessages = this.verificationHistory.length;
const conflictingClaims = this.verificationHistory.filter(v => v.conflicts.length > 0).length;
const avgTruthScore = this.verificationHistory.length > 0
? this.verificationHistory.reduce((sum, v) => sum + v.truthScore, 0) / this.verificationHistory.length
: 1.0;
const agentReliability = new Map<string, number>();
const communicationIntegrity = Math.max(0, 1 - (conflictingClaims / Math.max(1, verifiedMessages)));
return {
totalMessages,
verifiedMessages,
conflictingClaims,
avgTruthScore,
agentReliability,
communicationIntegrity
};
}
async analyzeCommunicationPatterns() {
const now = Date.now();
const recentMessages = this.messageHistory.filter(m => now - m.timestamp < 60000); // Last minute
return {
messageFrequency: recentMessages.length,
averageResponseTime: 50, // Simplified
communicationGraph: new Map(),
anomalies: []
};
}
async detectAnomalies() {
const now = Date.now();
const recentMessages = this.messageHistory.filter(m => now - m.timestamp < 5000); // Last 5 seconds
const highVolumeThreshold = 50;
const messageCounts = new Map<string, number>();
recentMessages.forEach(m => {
messageCounts.set(m.from, (messageCounts.get(m.from) || 0) + 1);
});
const suspiciousAgents = Array.from(messageCounts.entries())
.filter(([agent, count]) => count > highVolumeThreshold)
.map(([agent]) => agent);
return {
highVolumeDetected: suspiciousAgents.length > 0,
suspiciousAgents,
totalRecentMessages: recentMessages.length
};
}
}