aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
519 lines (436 loc) • 15.5 kB
JavaScript
/**
* Tests for CrossTaskLearner
*
* @tests @tools/ralph-external/cross-task-learner.mjs
* @requirement @agentic/code/addons/ralph/schemas/cross-task-memory.yaml
* @issue #154
*/
import { describe, it, beforeEach, afterEach } from 'node:test';
import assert from 'node:assert/strict';
import { existsSync, rmSync } from 'fs';
import { join } from 'path';
import { CrossTaskLearner } from './cross-task-learner.mjs';
const TEST_DIR = join(process.cwd(), '.test-cross-task-memory');
describe('CrossTaskLearner', () => {
let learner;
beforeEach(() => {
// Clean test directory
if (existsSync(TEST_DIR)) {
rmSync(TEST_DIR, { recursive: true, force: true });
}
// Initialize learner
learner = new CrossTaskLearner({
memory_path: TEST_DIR,
top_k: 3,
similarity_threshold: 0.7,
max_age_days: 90,
});
});
afterEach(() => {
// Clean up
if (existsSync(TEST_DIR)) {
rmSync(TEST_DIR, { recursive: true, force: true });
}
});
describe('recordTaskCompletion', () => {
it('should record a completed task', () => {
const task = learner.recordTaskCompletion({
task_description: 'Implement user authentication with JWT tokens',
task_type: 'implementation',
outcome: 'success',
iterations: 3,
final_quality: 0.85,
key_learnings: [
'JWT tokens should have short expiry times',
'Always validate token signature',
],
tags: ['auth', 'security', 'jwt'],
});
assert.ok(task.task_id);
assert.equal(task.task_description, 'Implement user authentication with JWT tokens');
assert.equal(task.task_type, 'implementation');
assert.equal(task.outcome, 'success');
assert.equal(task.iterations, 3);
assert.equal(task.final_quality, 0.85);
assert.equal(task.key_learnings.length, 2);
// Verify index updated
const stats = learner.getStatistics();
assert.equal(stats.total_tasks, 1);
});
it('should auto-extract tags from description', () => {
const task = learner.recordTaskCompletion({
task_description: 'Fix authentication bug in login flow',
task_type: 'debugging',
outcome: 'success',
});
assert.ok(task.tags.includes('fix'));
assert.ok(task.tags.includes('debug'));
});
it('should record reflections', () => {
const task = learner.recordTaskCompletion({
task_description: 'Refactor database query performance',
task_type: 'refactoring',
outcome: 'success',
reflections: [
{
iteration: 2,
content: 'Adding index on user_id column improved performance by 10x',
type: 'success_pattern',
effectiveness: 'helpful',
},
],
});
assert.equal(task.reflections.length, 1);
assert.equal(task.reflections[0].type, 'success_pattern');
});
});
describe('findSimilarTasks', () => {
beforeEach(() => {
// Add some tasks to search from
learner.recordTaskCompletion({
task_description: 'Implement JWT authentication for API endpoints',
task_type: 'implementation',
outcome: 'success',
iterations: 3,
final_quality: 0.85,
key_learnings: ['Use bcrypt for password hashing'],
tags: ['auth', 'security', 'api'],
});
learner.recordTaskCompletion({
task_description: 'Fix bug in user login validation',
task_type: 'debugging',
outcome: 'success',
iterations: 2,
final_quality: 0.90,
key_learnings: ['Validate email format before database query'],
tags: ['fix', 'auth', 'debug'],
});
learner.recordTaskCompletion({
task_description: 'Add documentation for payment processing',
task_type: 'documentation',
outcome: 'success',
iterations: 1,
final_quality: 0.95,
key_learnings: ['Include error codes in API docs'],
tags: ['document', 'api'],
});
});
it('should find similar tasks based on keywords', () => {
const similar = learner.findSimilarTasks(
'Implement OAuth authentication for user login'
);
// Should find authentication-related tasks
assert.ok(similar.length > 0);
const firstMatch = similar[0];
assert.ok(firstMatch.similarity_score >= 0.7);
assert.ok(
firstMatch.task.task_description.toLowerCase().includes('auth') ||
firstMatch.task.tags.includes('auth')
);
});
it('should respect similarity threshold', () => {
// Create learner with high threshold
const strictLearner = new CrossTaskLearner({
memory_path: TEST_DIR,
similarity_threshold: 0.95, // Very high threshold
});
// Record a task
strictLearner.recordTaskCompletion({
task_description: 'Implement user authentication',
task_type: 'implementation',
outcome: 'success',
});
// Search with somewhat different description
const similar = strictLearner.findSimilarTasks(
'Add payment processing feature'
);
// Should find no matches due to high threshold
assert.equal(similar.length, 0);
});
it('should limit results to top_k', () => {
// Create learner with top_k = 1
const limitedLearner = new CrossTaskLearner({
memory_path: TEST_DIR,
top_k: 1,
similarity_threshold: 0.5, // Lower threshold
});
// Use existing tasks
const similar = limitedLearner.findSimilarTasks(
'Fix authentication bug in login'
);
// Should return at most 1 result
assert.ok(similar.length <= 1);
});
it('should sort by similarity score', () => {
const similar = learner.findSimilarTasks(
'Implement authentication for login'
);
if (similar.length > 1) {
// Verify descending order
for (let i = 0; i < similar.length - 1; i++) {
assert.ok(similar[i].similarity_score >= similar[i + 1].similarity_score);
}
}
});
});
describe('getRelevantLearnings', () => {
beforeEach(() => {
learner.recordTaskCompletion({
task_description: 'Implement JWT authentication',
task_type: 'implementation',
outcome: 'success',
iterations: 3,
final_quality: 0.85,
key_learnings: [
'JWT tokens should expire after 15 minutes',
'Store refresh tokens securely',
],
reflections: [
{
iteration: 2,
content: 'Using RS256 instead of HS256 for better security',
type: 'strategy_change',
effectiveness: 'helpful',
},
],
tags: ['auth', 'security'],
});
});
it('should return aggregated learnings', () => {
const learnings = learner.getRelevantLearnings(
'Add OAuth authentication for user login'
);
assert.ok(learnings.similar_tasks.length > 0);
assert.ok(learnings.key_learnings.length > 0);
assert.ok(learnings.context_summary.length > 0);
});
it('should include reflections when enabled', () => {
const learnings = learner.getRelevantLearnings(
'Implement authentication system'
);
assert.ok(learnings.reflections.length > 0);
assert.ok(learnings.context_summary.includes('Helpful Reflections'));
});
it('should return empty when no similar tasks', () => {
const learnings = learner.getRelevantLearnings(
'Completely unrelated task about graphics rendering'
);
assert.equal(learnings.similar_tasks.length, 0);
assert.equal(learnings.key_learnings.length, 0);
assert.equal(learnings.context_summary, '');
});
it('should deduplicate key learnings', () => {
// Add another task with same learning
learner.recordTaskCompletion({
task_description: 'Fix authentication token expiry bug',
task_type: 'debugging',
outcome: 'success',
key_learnings: [
'JWT tokens should expire after 15 minutes', // Duplicate
'Always validate token expiry time',
],
tags: ['auth', 'fix'],
});
const learnings = learner.getRelevantLearnings(
'Implement JWT authentication'
);
// Count occurrences of the duplicate learning
const count = learnings.key_learnings.filter(
l => l === 'JWT tokens should expire after 15 minutes'
).length;
assert.equal(count, 1); // Should appear only once
});
it('should generate context summary with similar tasks', () => {
const learnings = learner.getRelevantLearnings(
'Implement authentication'
);
assert.ok(learnings.context_summary.includes('Cross-Task Learning Context'));
assert.ok(learnings.context_summary.includes('Similar Past Tasks'));
});
});
describe('pruneOldEntries', () => {
it('should remove entries older than max_age_days', () => {
// Record a task
const task = learner.recordTaskCompletion({
task_description: 'Test task',
task_type: 'testing',
outcome: 'success',
});
// Manually modify timestamp to be 100 days old
const taskData = learner.loadTask(task.task_id);
taskData.timestamp = new Date(Date.now() - 100 * 24 * 60 * 60 * 1000).toISOString();
// Update in index
const entry = learner.index.tasks.find(e => e.task_id === task.task_id);
if (entry) {
entry.timestamp = taskData.timestamp;
learner.saveIndex();
}
// Prune with 90 day limit
const result = learner.pruneOldEntries(90);
assert.equal(result.removed, 1);
assert.equal(result.remaining, 0);
});
it('should keep recent entries', () => {
learner.recordTaskCompletion({
task_description: 'Recent task',
task_type: 'implementation',
outcome: 'success',
});
const result = learner.pruneOldEntries(90);
assert.equal(result.removed, 0);
assert.equal(result.remaining, 1);
});
});
describe('getStatistics', () => {
it('should return empty stats for no tasks', () => {
const stats = learner.getStatistics();
assert.equal(stats.total_tasks, 0);
assert.deepEqual(stats.tasks_by_type, {});
assert.deepEqual(stats.tasks_by_outcome, {});
assert.equal(stats.oldest_task, null);
assert.equal(stats.newest_task, null);
});
it('should aggregate statistics correctly', () => {
learner.recordTaskCompletion({
task_description: 'Task 1',
task_type: 'implementation',
outcome: 'success',
});
learner.recordTaskCompletion({
task_description: 'Task 2',
task_type: 'debugging',
outcome: 'success',
});
learner.recordTaskCompletion({
task_description: 'Task 3',
task_type: 'implementation',
outcome: 'partial',
});
const stats = learner.getStatistics();
assert.equal(stats.total_tasks, 3);
assert.equal(stats.tasks_by_type.implementation, 2);
assert.equal(stats.tasks_by_type.debugging, 1);
assert.equal(stats.tasks_by_outcome.success, 2);
assert.equal(stats.tasks_by_outcome.partial, 1);
assert.ok(stats.oldest_task);
assert.ok(stats.newest_task);
});
});
describe('searchByTags', () => {
beforeEach(() => {
learner.recordTaskCompletion({
task_description: 'Task with auth tag',
task_type: 'implementation',
outcome: 'success',
tags: ['auth', 'security'],
});
learner.recordTaskCompletion({
task_description: 'Task with test tag',
task_type: 'testing',
outcome: 'success',
tags: ['test', 'coverage'],
});
});
it('should find tasks by tag', () => {
const results = learner.searchByTags(['auth']);
assert.equal(results.length, 1);
assert.ok(results[0].tags.includes('auth'));
});
it('should find tasks matching any tag', () => {
const results = learner.searchByTags(['auth', 'test']);
assert.equal(results.length, 2);
});
});
describe('searchByType', () => {
beforeEach(() => {
learner.recordTaskCompletion({
task_description: 'Implementation task',
task_type: 'implementation',
outcome: 'success',
});
learner.recordTaskCompletion({
task_description: 'Debug task',
task_type: 'debugging',
outcome: 'success',
});
});
it('should find tasks by type', () => {
const results = learner.searchByType('implementation');
assert.equal(results.length, 1);
assert.equal(results[0].task_type, 'implementation');
});
});
describe('export and import', () => {
beforeEach(() => {
learner.recordTaskCompletion({
task_description: 'Task to export',
task_type: 'implementation',
outcome: 'success',
key_learnings: ['Learning 1'],
});
});
it('should export memory as JSON', () => {
const exported = learner.export();
assert.ok(exported.version);
assert.ok(exported.exported_at);
assert.equal(exported.total_tasks, 1);
assert.ok(Array.isArray(exported.tasks));
assert.equal(exported.tasks[0].task_description, 'Task to export');
});
it('should import memory from JSON', () => {
const exported = learner.export();
// Clear and import
learner.clear();
assert.equal(learner.getStatistics().total_tasks, 0);
learner.import(exported);
assert.equal(learner.getStatistics().total_tasks, 1);
const tasks = learner.getAllTasks();
assert.equal(tasks[0].task_description, 'Task to export');
});
});
describe('clear', () => {
it('should clear all memory', () => {
learner.recordTaskCompletion({
task_description: 'Task to clear',
task_type: 'implementation',
outcome: 'success',
});
assert.equal(learner.getStatistics().total_tasks, 1);
learner.clear();
assert.equal(learner.getStatistics().total_tasks, 0);
assert.equal(learner.getAllTasks().length, 0);
});
});
describe('similarity calculation', () => {
it('should calculate high similarity for similar descriptions', () => {
learner.recordTaskCompletion({
task_description: 'Implement user authentication with JWT',
task_type: 'implementation',
outcome: 'success',
tags: ['auth', 'jwt', 'security'],
});
const similar = learner.findSimilarTasks(
'Implement JWT authentication for users'
);
// Should find with high similarity
assert.ok(similar.length > 0);
assert.ok(similar[0].similarity_score > 0.7);
});
it('should calculate low similarity for dissimilar descriptions', () => {
learner.recordTaskCompletion({
task_description: 'Implement user authentication',
task_type: 'implementation',
outcome: 'success',
tags: ['auth', 'security'],
});
const similar = learner.findSimilarTasks(
'Refactor database schema for analytics'
);
// Should find no matches or very low similarity
assert.ok(
similar.length === 0 ||
similar[0].similarity_score < 0.5
);
});
});
});