shipdeck
Version:
Ship MVPs in 48 hours. Fix bugs in 30 seconds. The command deck for developers who ship.
118 lines (99 loc) โข 3.75 kB
JavaScript
/**
* Test file for LLM Judge Agent
* Quick validation of basic functionality
*/
const { LLMJudgeAgent } = require('./llm-judge');
async function testLLMJudge() {
console.log('๐งช Testing LLM Judge Agent...\n');
try {
// Initialize agent
const judge = new LLMJudgeAgent({
anthropicConfig: {
skipValidation: true // Skip API validation for testing
}
});
// Test 1: Agent metadata
console.log('โ
Test 1: Agent Metadata');
const metadata = judge.getMetadata();
console.log('Agent Name:', metadata.name);
console.log('Capabilities:', metadata.capabilities);
console.log('Version:', metadata.version);
console.log();
// Test 2: Capabilities check
console.log('โ
Test 2: Capabilities Check');
const capabilities = judge.getCapabilities();
const expectedCapabilities = ['compare', 'evaluate', 'judge', 'rank', 'select-best'];
const hasRequiredCapabilities = expectedCapabilities.every(cap => capabilities.includes(cap));
console.log('Has required capabilities:', hasRequiredCapabilities);
console.log('All capabilities:', capabilities);
console.log();
// Test 3: Validation
console.log('โ
Test 3: Task Validation');
// Should pass
try {
judge.validateTask({ type: 'compare', solutions: [] });
console.log('Valid task validation: PASSED');
} catch (error) {
console.log('Valid task validation: FAILED -', error.message);
}
// Should fail
try {
judge.validateTask({});
console.log('Invalid task validation: FAILED (should have thrown)');
} catch (error) {
console.log('Invalid task validation: PASSED (correctly rejected)');
}
console.log();
// Test 4: Comparison task setup (without actual AI call)
console.log('โ
Test 4: Comparison Task Structure');
const mockSolutions = [
{ id: 'sol1', code: 'function add(a, b) { return a + b; }' },
{ id: 'sol2', code: 'const add = (a, b) => a + b;' }
];
const comparisonTask = {
type: 'compare',
solutions: mockSolutions,
criteria: {
correctness: { weight: 0.4 },
readability: { weight: 0.6 }
}
};
try {
judge.validateTask(comparisonTask);
console.log('Comparison task structure: VALID');
console.log('Solutions count:', comparisonTask.solutions.length);
} catch (error) {
console.log('Comparison task structure: INVALID -', error.message);
}
console.log();
// Test 5: Utility functions
console.log('โ
Test 5: Utility Functions');
// Test score calculation
const mockScores = { correctness: 8, readability: 6 };
const mockCriteria = {
correctness: { weight: 0.4 },
readability: { weight: 0.6 }
};
const overallScore = judge.calculateOverallScore(mockScores, mockCriteria);
console.log('Overall score calculation:', overallScore.toFixed(2));
// Test rating conversion
const rating = judge.scoreToRating(overallScore);
console.log('Score to rating:', rating);
// Test array shuffle
const testArray = [1, 2, 3, 4, 5];
const shuffled = judge.shuffleArray(testArray);
console.log('Array shuffle test:', shuffled.length === testArray.length ? 'PASSED' : 'FAILED');
console.log();
console.log('๐ All tests completed successfully!');
console.log('\nLLM Judge Agent is ready for use with the following capabilities:');
capabilities.forEach(cap => console.log(` - ${cap}`));
} catch (error) {
console.error('โ Test failed:', error.message);
console.error(error.stack);
}
}
// Run tests if called directly
if (require.main === module) {
testLLMJudge();
}
module.exports = { testLLMJudge };