@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
1,263 lines (1,143 loc) • 41.2 kB
text/typescript
import { JSONSchema7 } from 'json-schema';
import { randomUUID } from 'crypto';
import { createTool, createSuccessResult, createErrorResult } from '../../core/tool-framework.js';
import { ToolRegistration, RequestContext } from '../../core/types.js';
import { TestRunner } from './test-runner.js';
import { FSWatcher, watch } from 'fs';
/**
* Testing Framework Tools - 12-Factor MCP Implementation
*
* Implements Factor 2: Deterministic Execution with structured outputs
* Implements Factor 3: Stateless Processes with RequestContext
* Implements Factor 4: Structured Outputs for LLM consumption
*/
// Input type interfaces
interface RunTestsInput {
files?: string[];
pattern?: string;
watch?: boolean;
coverage?: boolean;
bail?: boolean;
verbose?: boolean;
updateSnapshots?: boolean;
parallel?: boolean;
maxWorkers?: number;
timeout?: number;
grep?: string;
tags?: string[];
}
interface TrackTestResultsInput {
branch?: string;
commit?: string;
tags?: string[];
compareWithBaseline?: boolean;
limit?: number;
}
interface AnalyzeTestCoverageInput {
targetCoverage?: number;
granularity?: 'project' | 'file' | 'function';
includeUncovered?: boolean;
}
interface DetectFlakyTestsInput {
minRuns?: number;
threshold?: number;
}
interface WatchTestsInput {
patterns?: string[];
command?: string;
autoRerun?: boolean;
debounceMs?: number;
}
interface StopTestWatcherInput {
watcherId: string;
}
interface SetTestBaselineInput {
resultId?: string;
tags?: string[];
}
// Module state management
let testRunner: TestRunner;
let activeWatchers: Map<string, { watcher: FSWatcher; config: any }> = new Map();
/**
* Run tests with detailed reporting and coverage analysis
*/
const runTestsTool = createTool<RunTestsInput, any>({
name: 'run_tests',
description: 'Execute test suite with detailed reporting and coverage analysis',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
files: {
type: 'array',
items: { type: 'string' },
description: 'Specific test files to run'
},
pattern: {
type: 'string',
description: 'Test name pattern to match'
},
watch: {
type: 'boolean',
description: 'Run tests in watch mode',
default: false
},
coverage: {
type: 'boolean',
description: 'Generate coverage report',
default: false
},
bail: {
type: 'boolean',
description: 'Stop on first test failure',
default: false
},
verbose: {
type: 'boolean',
description: 'Verbose output',
default: false
},
updateSnapshots: {
type: 'boolean',
description: 'Update test snapshots',
default: false
},
parallel: {
type: 'boolean',
description: 'Run tests in parallel',
default: true
},
maxWorkers: {
type: 'number',
description: 'Maximum number of worker processes'
},
timeout: {
type: 'number',
description: 'Test timeout in milliseconds'
},
grep: {
type: 'string',
description: 'Pattern to filter tests (Mocha/RSpec)'
},
tags: {
type: 'array',
items: { type: 'string' },
description: 'Tags to associate with this test run'
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: RunTestsInput, context: RequestContext) {
try {
if (!testRunner) {
testRunner = new TestRunner();
}
const projectId = context.projectId || 'default';
const resultId = `test-${randomUUID()}`;
const now = Date.now();
// Run tests
const result = await testRunner.runTests(input);
// Get current git info if available
let branch: string | undefined;
let commit: string | undefined;
try {
const { exec } = await import('child_process');
const { promisify } = await import('util');
const execAsync = promisify(exec);
const { stdout: branchOut } = await execAsync('git rev-parse --abbrev-ref HEAD');
const { stdout: commitOut } = await execAsync('git rev-parse HEAD');
branch = branchOut.trim();
commit = commitOut.trim().substring(0, 7);
} catch {
// Git info not available
}
// Save test result
const testResult = await context.db.run(
`INSERT INTO test_results
(id, project_id, timestamp, branch, commit, framework, command, duration,
summary_total, summary_passed, summary_failed, summary_skipped, summary_pending,
summary_success_rate, coverage_data, tags, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
resultId,
projectId,
now,
branch || null,
commit || null,
result.framework,
result.command,
result.duration,
result.summary.total,
result.summary.passed,
result.summary.failed,
result.summary.skipped,
result.summary.pending,
result.summary.successRate,
result.coverage ? JSON.stringify(result.coverage) : null,
JSON.stringify(input.tags || []),
now
]
);
if (!testResult.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to save test result',
details: { error: testResult.error },
category: 'system'
});
}
// Save test suites and cases
for (const suite of result.suites) {
const suiteId = `suite-${randomUUID()}`;
await context.db.run(
`INSERT INTO test_suites
(id, result_id, name, path, status, duration, timestamp, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
[
suiteId,
resultId,
suite.name,
suite.path,
suite.status,
suite.duration || null,
now,
now
]
);
// Save test cases
for (const test of suite.tests) {
const caseId = `case-${randomUUID()}`;
await context.db.run(
`INSERT INTO test_cases
(id, suite_id, result_id, name, status, duration, error_message, error_stack,
error_expected, error_actual, error_type, assertions, skipped, tags, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
caseId,
suiteId,
resultId,
test.name,
test.status,
test.duration || null,
test.error?.message || null,
test.error?.stack || null,
test.error?.expected ? JSON.stringify(test.error.expected) : null,
test.error?.actual ? JSON.stringify(test.error.actual) : null,
test.error?.type || null,
test.assertions || null,
test.skipped || false,
JSON.stringify(test.tags || []),
now
]
);
}
}
// Save coverage data if available
if (result.coverage && result.coverage.files) {
for (const file of result.coverage.files) {
const coverageId = `coverage-${randomUUID()}`;
await context.db.run(
`INSERT INTO test_coverage_files
(id, result_id, path, lines_total, lines_covered, lines_skipped, lines_percentage,
statements_total, statements_covered, statements_skipped, statements_percentage,
functions_total, functions_covered, functions_skipped, functions_percentage,
branches_total, branches_covered, branches_skipped, branches_percentage,
uncovered_lines, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
coverageId,
resultId,
file.path,
file.lines.total,
file.lines.covered,
file.lines.skipped,
file.lines.percentage,
file.statements.total,
file.statements.covered,
file.statements.skipped,
file.statements.percentage,
file.functions.total,
file.functions.covered,
file.functions.skipped,
file.functions.percentage,
file.branches.total,
file.branches.covered,
file.branches.skipped,
file.branches.percentage,
JSON.stringify(file.uncoveredLines || []),
now
]
);
}
}
// Update flaky test tracking
for (const suite of result.suites) {
for (const test of suite.tests) {
const key = `${projectId}:${test.name}:${suite.name}`;
// Check if flaky test exists
const existing = await context.db.get(
'SELECT * FROM flaky_tests WHERE project_id = ? AND test_name = ? AND suite_name = ?',
[projectId, test.name, suite.name]
);
if (existing.success && existing.data) {
// Update existing flaky test
const totalRuns = existing.data.total_runs + 1;
const failures = existing.data.failures + (test.status === 'failed' ? 1 : 0);
const passes = existing.data.passes + (test.status === 'passed' ? 1 : 0);
const failureRate = failures / totalRuns;
const recentRuns = JSON.parse(existing.data.recent_runs || '[]');
recentRuns.push({
passed: test.status === 'passed',
timestamp: new Date().toISOString()
});
if (recentRuns.length > 10) {
recentRuns.shift();
}
await context.db.run(
`UPDATE flaky_tests
SET failure_rate = ?, total_runs = ?, failures = ?, passes = ?,
last_seen = ?, recent_runs = ?, updated_at = ?
WHERE id = ?`,
[
failureRate,
totalRuns,
failures,
passes,
now,
JSON.stringify(recentRuns),
now,
existing.data.id
]
);
} else if (test.status === 'failed') {
// Create new flaky test entry for failed test
const flakyId = `flaky-${randomUUID()}`;
await context.db.run(
`INSERT INTO flaky_tests
(id, project_id, test_name, suite_name, failure_rate, total_runs,
failures, passes, first_seen, last_seen, recent_runs, error_patterns,
created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
flakyId,
projectId,
test.name,
suite.name,
1.0, // 100% failure rate initially
1,
1,
0,
now,
now,
JSON.stringify([{ passed: false, timestamp: new Date().toISOString() }]),
JSON.stringify([test.error?.message || '']),
now,
now
]
);
}
}
}
// Generate test summary
const failedTests = result.suites.flatMap(s =>
s.tests.filter(t => t.status === 'failed').map(t => ({ suite: s.name, test: t }))
);
return createSuccessResult({
result: {
id: resultId,
projectId,
timestamp: new Date(now).toISOString(),
branch,
commit,
summary: result.summary,
coverage: result.coverage
},
failedTests: failedTests.map(({ suite, test }) => ({
suite,
name: test.name,
error: test.error?.message
})),
message: result.summary.failed === 0
? `✅ All ${result.summary.total} tests passed!`
: `❌ ${result.summary.failed} of ${result.summary.total} tests failed`,
insights: [
`Success rate: ${result.summary.successRate.toFixed(1)}%`,
`Execution time: ${(result.duration / 1000).toFixed(2)}s`,
result.coverage ? `Coverage: ${result.coverage.lines.percentage.toFixed(1)}%` : null
].filter(Boolean)
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Test execution failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* View test result history and trends over time
*/
const trackTestResultsTool = createTool<TrackTestResultsInput, any>({
name: 'track_test_results',
description: 'View test result history and trends over time',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
branch: {
type: 'string',
description: 'Filter by git branch'
},
commit: {
type: 'string',
description: 'Filter by git commit'
},
tags: {
type: 'array',
items: { type: 'string' },
description: 'Filter by tags'
},
compareWithBaseline: {
type: 'boolean',
description: 'Compare latest results with baseline',
default: false
},
limit: {
type: 'number',
description: 'Maximum number of results to return',
default: 20
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: TrackTestResultsInput, context: RequestContext) {
try {
const projectId = context.projectId || 'default';
// Build query
let query = `
SELECT r.*,
(SELECT COUNT(*) FROM test_cases WHERE result_id = r.id AND status = 'failed') as failed_count
FROM test_results r
WHERE r.project_id = ?
`;
const params: any[] = [projectId];
if (input.branch) {
query += ' AND r.branch = ?';
params.push(input.branch);
}
if (input.commit) {
query += ' AND r.commit = ?';
params.push(input.commit);
}
if (input.tags && input.tags.length > 0) {
// Check if any of the tags match
const tagConditions = input.tags.map(() => 'r.tags LIKE ?').join(' OR ');
query += ` AND (${tagConditions})`;
input.tags.forEach(tag => params.push(`%"${tag}"%`));
}
query += ' ORDER BY r.timestamp DESC LIMIT ?';
params.push(input.limit || 20);
const results = await context.db.all(query, params);
if (!results.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to fetch test results',
details: { error: results.error },
category: 'system'
});
}
if (results.data.length === 0) {
return createSuccessResult({
results: [],
totalCount: 0,
message: 'No test results found matching the criteria'
});
}
// Calculate trends
const trends = {
successRate: { improving: false, stable: false, degrading: false, changePercentage: 0 },
duration: { improving: false, stable: false, degrading: false, changePercentage: 0 },
coverage: { improving: false, stable: false, degrading: false, changePercentage: 0 }
};
if (results.data.length >= 2) {
const recent = results.data.slice(0, 5);
const older = results.data.slice(-5);
const recentAvgSuccess = recent.reduce((sum: number, r: any) => sum + r.summary_success_rate, 0) / recent.length;
const olderAvgSuccess = older.reduce((sum: number, r: any) => sum + r.summary_success_rate, 0) / older.length;
const successChange = ((recentAvgSuccess - olderAvgSuccess) / olderAvgSuccess) * 100;
trends.successRate.changePercentage = successChange;
trends.successRate.improving = successChange > 1;
trends.successRate.degrading = successChange < -1;
trends.successRate.stable = Math.abs(successChange) <= 1;
}
// Get baseline comparison if requested
let comparison = null;
if (input.compareWithBaseline) {
const baseline = await context.db.get(
'SELECT * FROM test_baselines WHERE project_id = ? ORDER BY created_at DESC LIMIT 1',
[projectId]
);
if (baseline.success && baseline.data && results.data.length > 0) {
const baselineResult = await context.db.get(
'SELECT * FROM test_results WHERE id = ?',
[baseline.data.result_id]
);
if (baselineResult.success && baselineResult.data) {
const latest = results.data[0];
comparison = {
baseline: {
id: baselineResult.data.id,
timestamp: new Date(baselineResult.data.timestamp).toISOString(),
successRate: baselineResult.data.summary_success_rate,
coverage: baselineResult.data.coverage_data ?
JSON.parse(baselineResult.data.coverage_data).lines.percentage : null
},
current: {
id: latest.id,
timestamp: new Date(latest.timestamp).toISOString(),
successRate: latest.summary_success_rate,
coverage: latest.coverage_data ?
JSON.parse(latest.coverage_data).lines.percentage : null
},
improvements: [],
regressions: []
};
if (latest.summary_success_rate > baselineResult.data.summary_success_rate) {
comparison.improvements.push(
`Success rate improved from ${baselineResult.data.summary_success_rate.toFixed(1)}% to ${latest.summary_success_rate.toFixed(1)}%`
);
} else if (latest.summary_success_rate < baselineResult.data.summary_success_rate) {
comparison.regressions.push(
`Success rate decreased from ${baselineResult.data.summary_success_rate.toFixed(1)}% to ${latest.summary_success_rate.toFixed(1)}%`
);
}
if (comparison.baseline.coverage && comparison.current.coverage) {
if (comparison.current.coverage > comparison.baseline.coverage) {
comparison.improvements.push(
`Coverage improved from ${comparison.baseline.coverage.toFixed(1)}% to ${comparison.current.coverage.toFixed(1)}%`
);
} else if (comparison.current.coverage < comparison.baseline.coverage) {
comparison.regressions.push(
`Coverage decreased from ${comparison.baseline.coverage.toFixed(1)}% to ${comparison.current.coverage.toFixed(1)}%`
);
}
}
}
}
}
// Format results
const formattedResults = results.data.map((r: any) => ({
id: r.id,
timestamp: new Date(r.timestamp).toISOString(),
branch: r.branch,
commit: r.commit,
summary: {
total: r.summary_total,
passed: r.summary_passed,
failed: r.summary_failed,
skipped: r.summary_skipped,
pending: r.summary_pending,
successRate: r.summary_success_rate
},
duration: r.duration,
coverage: r.coverage_data ? JSON.parse(r.coverage_data) : null,
tags: JSON.parse(r.tags || '[]')
}));
return createSuccessResult({
results: formattedResults,
totalCount: formattedResults.length,
trends,
comparison,
message: `Found ${formattedResults.length} test result(s)`,
insights: [
trends.successRate.improving ? 'Test success rate is improving' : null,
trends.successRate.degrading ? 'Test success rate is degrading - investigate failures' : null,
comparison?.regressions.length > 0 ? 'Performance regressions detected compared to baseline' : null
].filter(Boolean)
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to track test results: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Analyze test coverage and identify areas needing more tests
*/
const analyzeTestCoverageTool = createTool<AnalyzeTestCoverageInput, any>({
name: 'analyze_test_coverage',
description: 'Analyze test coverage and identify areas needing more tests',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
targetCoverage: {
type: 'number',
description: 'Target coverage percentage',
default: 80
},
granularity: {
type: 'string',
enum: ['project', 'file', 'function'],
description: 'Level of detail for coverage analysis',
default: 'file'
},
includeUncovered: {
type: 'boolean',
description: 'Include list of uncovered code',
default: true
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: AnalyzeTestCoverageInput, context: RequestContext) {
try {
const projectId = context.projectId || 'default';
// Get latest test result with coverage
const latest = await context.db.get(
`SELECT * FROM test_results
WHERE project_id = ? AND coverage_data IS NOT NULL
ORDER BY timestamp DESC LIMIT 1`,
[projectId]
);
if (!latest.success || !latest.data) {
return createSuccessResult({
message: '⚠️ No coverage data available. Run tests with coverage=true option.',
recommendation: 'Use run_tests with coverage=true to generate coverage data'
});
}
const coverage = JSON.parse(latest.data.coverage_data);
const targetCoverage = input.targetCoverage || 80;
// Get file-level coverage details
const fileCoverage = await context.db.all(
'SELECT * FROM test_coverage_files WHERE result_id = ? ORDER BY lines_percentage ASC',
[latest.data.id]
);
if (!fileCoverage.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to fetch coverage details',
details: { error: fileCoverage.error },
category: 'system'
});
}
// Analyze coverage
const analysis = {
overall: {
lines: coverage.lines.percentage,
statements: coverage.statements.percentage,
functions: coverage.functions.percentage,
branches: coverage.branches.percentage
},
meetsTarget: coverage.lines.percentage >= targetCoverage,
gap: targetCoverage - coverage.lines.percentage,
lowCoverageFiles: fileCoverage.data
.filter((f: any) => f.lines_percentage < targetCoverage)
.map((f: any) => ({
path: f.path,
coverage: f.lines_percentage,
uncoveredLines: f.uncovered_lines ? JSON.parse(f.uncovered_lines).length : 0,
totalLines: f.lines_total
})),
uncoveredFiles: fileCoverage.data.filter((f: any) => f.lines_percentage === 0),
summary: {
totalFiles: fileCoverage.data.length,
filesWithGoodCoverage: fileCoverage.data.filter((f: any) => f.lines_percentage >= targetCoverage).length,
filesWithPoorCoverage: fileCoverage.data.filter((f: any) => f.lines_percentage < targetCoverage).length,
filesWithNoCoverage: fileCoverage.data.filter((f: any) => f.lines_percentage === 0).length
}
};
// Generate recommendations
const recommendations = [];
if (!analysis.meetsTarget) {
recommendations.push({
priority: 'high',
message: `Increase overall coverage by ${Math.abs(analysis.gap).toFixed(1)}% to meet target of ${targetCoverage}%`
});
}
if (analysis.uncoveredFiles.length > 0) {
recommendations.push({
priority: 'high',
message: `Add tests for ${analysis.uncoveredFiles.length} completely untested file(s)`
});
}
const criticalFiles = analysis.lowCoverageFiles.filter((f: any) => f.coverage < 50);
if (criticalFiles.length > 0) {
recommendations.push({
priority: 'medium',
message: `Improve coverage for ${criticalFiles.length} file(s) with less than 50% coverage`
});
}
return createSuccessResult({
coverage: analysis.overall,
targetCoverage,
meetsTarget: analysis.meetsTarget,
gap: analysis.gap,
fileAnalysis: {
total: analysis.summary.totalFiles,
withGoodCoverage: analysis.summary.filesWithGoodCoverage,
withPoorCoverage: analysis.summary.filesWithPoorCoverage,
withNoCoverage: analysis.summary.filesWithNoCoverage
},
lowCoverageFiles: analysis.lowCoverageFiles.slice(0, 10), // Top 10 worst files
recommendations,
message: analysis.meetsTarget
? `✅ Coverage goal met! (${coverage.lines.percentage.toFixed(1)}% >= ${targetCoverage}%)`
: `⚠️ Coverage below target (${coverage.lines.percentage.toFixed(1)}% < ${targetCoverage}%)`,
testResultId: latest.data.id,
timestamp: new Date(latest.data.timestamp).toISOString()
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to analyze coverage: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Identify tests that fail intermittently
*/
const detectFlakyTestsTool = createTool<DetectFlakyTestsInput, any>({
name: 'detect_flaky_tests',
description: 'Identify tests that fail intermittently',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
minRuns: {
type: 'number',
description: 'Minimum test runs required for analysis',
default: 5
},
threshold: {
type: 'number',
description: 'Failure rate threshold (0-1)',
default: 0.2
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: DetectFlakyTestsInput, context: RequestContext) {
try {
const projectId = context.projectId || 'default';
const minRuns = input.minRuns || 5;
const threshold = input.threshold || 0.2;
// Get flaky tests
const flakyTests = await context.db.all(
`SELECT * FROM flaky_tests
WHERE project_id = ?
AND total_runs >= ?
AND failure_rate >= ?
AND failure_rate <= ?
ORDER BY failure_rate DESC`,
[projectId, minRuns, threshold, 1 - threshold]
);
if (!flakyTests.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to fetch flaky tests',
details: { error: flakyTests.error },
category: 'system'
});
}
if (flakyTests.data.length === 0) {
return createSuccessResult({
flakyTests: [],
message: '✅ No flaky tests detected!',
summary: 'All tests show consistent pass/fail behavior'
});
}
// Format flaky test data
const formattedTests = flakyTests.data.map((test: any) => {
const recentRuns = JSON.parse(test.recent_runs || '[]');
return {
id: test.id,
testName: test.test_name,
suiteName: test.suite_name,
failureRate: test.failure_rate,
totalRuns: test.total_runs,
failures: test.failures,
passes: test.passes,
recentResults: recentRuns.map((r: any) => r.passed ? 'passed' : 'failed'),
firstSeen: new Date(test.first_seen).toISOString(),
lastSeen: new Date(test.last_seen).toISOString(),
errorPatterns: JSON.parse(test.error_patterns || '[]')
};
});
// Group by failure rate severity
const severity = {
critical: formattedTests.filter(t => t.failureRate >= 0.5),
high: formattedTests.filter(t => t.failureRate >= 0.3 && t.failureRate < 0.5),
medium: formattedTests.filter(t => t.failureRate >= 0.2 && t.failureRate < 0.3),
low: formattedTests.filter(t => t.failureRate < 0.2)
};
// Generate recommendations
const recommendations = [];
if (severity.critical.length > 0) {
recommendations.push({
priority: 'critical',
message: `${severity.critical.length} test(s) fail more than 50% of the time - investigate immediately`
});
}
if (severity.high.length > 0) {
recommendations.push({
priority: 'high',
message: `${severity.high.length} test(s) have high failure rates - check for timing dependencies`
});
}
recommendations.push({
priority: 'medium',
message: 'Consider adding retry logic for known flaky tests'
});
recommendations.push({
priority: 'low',
message: 'Review test isolation and ensure tests don\'t share state'
});
return createSuccessResult({
flakyTests: formattedTests,
totalCount: formattedTests.length,
severity,
recommendations,
message: `⚠️ Found ${formattedTests.length} flaky test(s)`,
insights: [
`Most flaky: "${formattedTests[0]?.testName}" (${(formattedTests[0]?.failureRate * 100).toFixed(1)}% failure rate)`,
`Average failure rate: ${(formattedTests.reduce((sum, t) => sum + t.failureRate, 0) / formattedTests.length * 100).toFixed(1)}%`
]
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to detect flaky tests: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Watch files and automatically rerun tests on changes
*/
const watchTestsTool = createTool<WatchTestsInput, any>({
name: 'watch_tests',
description: 'Watch files and automatically rerun tests on changes',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
patterns: {
type: 'array',
items: { type: 'string' },
description: 'File patterns to watch',
default: ['**/*.test.*', '**/*.spec.*', 'src/**/*']
},
command: {
type: 'string',
description: 'Test command to run'
},
autoRerun: {
type: 'boolean',
description: 'Automatically rerun tests on file change',
default: true
},
debounceMs: {
type: 'number',
description: 'Debounce delay in milliseconds',
default: 1000
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: WatchTestsInput, context: RequestContext) {
try {
const projectId = context.projectId || 'default';
const watcherId = `watcher-${randomUUID()}`;
const now = Date.now();
// Save watcher configuration
const result = await context.db.run(
`INSERT INTO test_watchers
(id, project_id, patterns, command, active, auto_rerun, debounce_ms, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
watcherId,
projectId,
JSON.stringify(input.patterns || ['**/*.test.*', '**/*.spec.*', 'src/**/*']),
input.command || 'npm test',
true,
input.autoRerun !== false,
input.debounceMs || 1000,
now,
now
]
);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to create test watcher',
details: { error: result.error },
category: 'system'
});
}
// Set up file watcher
let debounceTimer: NodeJS.Timeout | null = null;
const runTests = async () => {
if (!testRunner) {
testRunner = new TestRunner();
}
try {
const result = await testRunner.runTests({ watch: false });
// Update last run
await context.db.run(
'UPDATE test_watchers SET last_run_id = ?, updated_at = ? WHERE id = ?',
[result.id, Date.now(), watcherId]
);
console.log(`Tests completed: ${result.summary.passed}/${result.summary.total} passed`);
} catch (error) {
console.error('Test run failed:', error);
}
};
const handleFileChange = () => {
if (debounceTimer) clearTimeout(debounceTimer);
debounceTimer = setTimeout(() => {
if (input.autoRerun !== false) {
console.log('File changed, rerunning tests...');
runTests();
}
}, input.debounceMs || 1000);
};
// Create watcher
const patterns = input.patterns || ['**/*.test.*', '**/*.spec.*', 'src/**/*'];
const watcher = watch(patterns[0], { recursive: true }, handleFileChange);
// Store watcher reference
activeWatchers.set(watcherId, {
watcher,
config: { watcherId, patterns, command: input.command || 'npm test' }
});
// Run initial test if autoRerun is enabled
if (input.autoRerun !== false) {
await runTests();
}
return createSuccessResult({
watcherId,
patterns,
command: input.command || 'npm test',
autoRerun: input.autoRerun !== false,
debounceMs: input.debounceMs || 1000,
message: '👁️ Test watcher started',
instructions: [
'Watching for file changes...',
`Tests will ${input.autoRerun !== false ? 'automatically rerun' : 'not rerun'} on changes`,
`Use stop_test_watcher with watcherId "${watcherId}" to stop`
]
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to start test watcher: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Stop a running test watcher
*/
const stopTestWatcherTool = createTool<StopTestWatcherInput, any>({
name: 'stop_test_watcher',
description: 'Stop a running test watcher',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
watcherId: {
type: 'string',
description: 'ID of the watcher to stop'
}
},
required: ['watcherId'],
additionalProperties: false
} as JSONSchema7,
async execute(input: StopTestWatcherInput, context: RequestContext) {
try {
// Check if watcher exists in memory
const activeWatcher = activeWatchers.get(input.watcherId);
if (activeWatcher) {
activeWatcher.watcher.close();
activeWatchers.delete(input.watcherId);
}
// Update database
const result = await context.db.run(
'UPDATE test_watchers SET active = false, updated_at = ? WHERE id = ?',
[Date.now(), input.watcherId]
);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to update watcher status',
details: { error: result.error },
category: 'system'
});
}
if (result.success && result.data?.changes === 0) {
return createErrorResult({
code: 'RESOURCE_NOT_FOUND',
message: `Watcher ${input.watcherId} not found`,
category: 'validation'
});
}
return createSuccessResult({
watcherId: input.watcherId,
stopped: true,
message: '✅ Test watcher stopped'
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to stop test watcher: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Set a test result as the baseline for comparisons
*/
const setTestBaselineTool = createTool<SetTestBaselineInput, any>({
name: 'set_test_baseline',
description: 'Set a test result as the baseline for comparisons',
category: 'testing-framework',
inputSchema: {
type: 'object',
properties: {
resultId: {
type: 'string',
description: 'Test result ID to set as baseline (defaults to latest)'
},
tags: {
type: 'array',
items: { type: 'string' },
description: 'Additional tags for the baseline',
default: ['baseline']
}
},
additionalProperties: false
} as JSONSchema7,
async execute(input: SetTestBaselineInput, context: RequestContext) {
try {
const projectId = context.projectId || 'default';
let targetId = input.resultId;
// Use latest result if no ID specified
if (!targetId) {
const latest = await context.db.get(
'SELECT id FROM test_results WHERE project_id = ? ORDER BY timestamp DESC LIMIT 1',
[projectId]
);
if (!latest.success || !latest.data) {
return createErrorResult({
code: 'RESOURCE_NOT_FOUND',
message: 'No test results available to set as baseline',
category: 'validation'
});
}
targetId = latest.data.id;
}
// Verify result exists
const testResult = await context.db.get(
'SELECT * FROM test_results WHERE id = ? AND project_id = ?',
[targetId, projectId]
);
if (!testResult.success || !testResult.data) {
return createErrorResult({
code: 'RESOURCE_NOT_FOUND',
message: `Test result ${targetId} not found`,
category: 'validation'
});
}
// Delete existing baseline for project
await context.db.run(
'DELETE FROM test_baselines WHERE project_id = ?',
[projectId]
);
// Create new baseline
const baselineId = `baseline-${randomUUID()}`;
const result = await context.db.run(
`INSERT INTO test_baselines
(id, project_id, result_id, name, created_at)
VALUES (?, ?, ?, ?, ?)`,
[
baselineId,
projectId,
targetId,
'baseline',
Date.now()
]
);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to set baseline',
details: { error: result.error },
category: 'system'
});
}
// Add baseline tag to test result
const existingTags = JSON.parse(testResult.data.tags || '[]');
if (!existingTags.includes('baseline')) {
existingTags.push('baseline');
await context.db.run(
'UPDATE test_results SET tags = ? WHERE id = ?',
[JSON.stringify(existingTags), targetId]
);
}
const coverage = testResult.data.coverage_data ?
JSON.parse(testResult.data.coverage_data) : null;
return createSuccessResult({
baseline: {
id: baselineId,
resultId: targetId,
timestamp: new Date(testResult.data.timestamp).toISOString(),
summary: {
total: testResult.data.summary_total,
passed: testResult.data.summary_passed,
failed: testResult.data.summary_failed,
successRate: testResult.data.summary_success_rate
},
coverage: coverage ? coverage.lines.percentage : null
},
message: '✅ Test baseline set',
details: [
`Success Rate: ${testResult.data.summary_success_rate.toFixed(1)}%`,
coverage ? `Coverage: ${coverage.lines.percentage.toFixed(1)}%` : 'No coverage data',
'Future test runs will be compared against this baseline'
]
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to set baseline: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Setup all testing framework tools
*/
export async function setupTestingFrameworkTools(): Promise<ToolRegistration> {
return {
module: 'testing-framework',
tools: [
runTestsTool,
trackTestResultsTool,
analyzeTestCoverageTool,
detectFlakyTestsTool,
watchTestsTool,
stopTestWatcherTool,
setTestBaselineTool
]
};
}