@boundless-oss/atlas
Version:
Atlas - MCP Server for comprehensive startup project management
1,017 lines (927 loc) • 33.5 kB
text/typescript
import { JSONSchema7 } from 'json-schema';
import { randomUUID } from 'crypto';
import { createTool, createSuccessResult, createErrorResult } from '../../core/tool-framework.js';
import { ToolRegistration, RequestContext } from '../../core/types.js';
/**
* Performance Monitoring Tools - 12-Factor MCP Implementation
*
* Implements Factor 2: Deterministic Execution with structured outputs
* Implements Factor 3: Stateless Processes with RequestContext
* Implements Factor 4: Structured Outputs for LLM consumption
*/
// Input type interfaces
interface GetSystemMetricsInput {
timeRange?: '1h' | '24h' | '7d' | '30d';
includeDetails?: boolean;
}
interface GetToolMetricsInput {
toolName: string;
timeRange?: '1h' | '24h' | '7d' | '30d';
includeQuality?: boolean;
}
interface GeneratePerformanceAlertsInput {
alertTypes?: Array<'performance' | 'errors' | 'quality' | 'costs' | 'security'>;
severity?: 'low' | 'medium' | 'high' | 'critical';
}
interface ExportPerformanceMetricsInput {
format?: 'csv' | 'json';
timeRange?: '1h' | '24h' | '7d' | '30d';
includeQuality?: boolean;
filePath?: string;
}
interface RecordQualityEvaluationInput {
toolName: string;
quality: number;
feedback?: string;
executionId?: string;
}
interface GetQualityMetricsInput {
toolName?: string;
timeRange?: '1h' | '24h' | '7d' | '30d';
minRatings?: number;
}
/**
* Get system metrics
*/
const getSystemMetricsTool = createTool<GetSystemMetricsInput, any>({
name: 'get_system_metrics',
description: 'Get comprehensive system performance metrics including success rates, response times, and error statistics. Use this to monitor overall Atlas health and identify performance bottlenecks.',
category: 'performance-monitoring',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
timeRange: {
type: 'string',
enum: ['1h', '24h', '7d', '30d'],
default: '24h',
description: 'Time range for metrics aggregation'
},
includeDetails: {
type: 'boolean',
default: false,
description: 'Include detailed breakdown by tool and operation'
}
},
required: [],
additionalProperties: false
} as JSONSchema7,
async execute(input: GetSystemMetricsInput, context: RequestContext) {
try {
const timeRange = input.timeRange || '24h';
const includeDetails = input.includeDetails || false;
// Calculate time bounds
const now = Date.now();
const timeRangeMs = {
'1h': 60 * 60 * 1000,
'24h': 24 * 60 * 60 * 1000,
'7d': 7 * 24 * 60 * 60 * 1000,
'30d': 30 * 24 * 60 * 60 * 1000
};
const startTime = now - timeRangeMs[timeRange];
// Get overall metrics
const overallResult = await context.db.get(
`SELECT
COUNT(*) as total_executions,
COUNT(CASE WHEN success = 1 THEN 1 END) as successful_executions,
AVG(execution_time) as avg_response_time,
MAX(execution_time) as max_response_time,
COUNT(CASE WHEN success = 0 THEN 1 END) as error_count,
SUM(token_count) as total_tokens,
SUM(cost) as total_cost
FROM performance_metrics_v2
WHERE timestamp >= ?`,
[startTime]
);
if (!overallResult.success || !overallResult.data) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to retrieve system metrics',
category: 'system'
});
}
const metrics = overallResult.data;
const successRate = metrics.total_executions > 0
? metrics.successful_executions / metrics.total_executions
: 0;
// Get top tools by usage
const topToolsResult = await context.db.query(
`SELECT
tool_name,
COUNT(*) as executions,
AVG(execution_time) as avg_time,
COUNT(CASE WHEN success = 1 THEN 1 END) as successes
FROM performance_metrics_v2
WHERE timestamp >= ?
GROUP BY tool_name
ORDER BY executions DESC
LIMIT 10`,
[startTime]
);
const topTools = (topToolsResult.data || []).map((tool: any) => ({
name: tool.tool_name,
executions: tool.executions,
avgResponseTime: Math.round(tool.avg_time || 0),
successRate: tool.executions > 0 ? tool.successes / tool.executions : 0
}));
let toolBreakdown = {};
if (includeDetails) {
const detailsResult = await context.db.query(
`SELECT
tool_name,
COUNT(*) as executions,
COUNT(CASE WHEN success = 1 THEN 1 END) as successes,
AVG(execution_time) as avg_time,
MIN(execution_time) as min_time,
MAX(execution_time) as max_time,
COUNT(DISTINCT user_id) as unique_users,
COUNT(DISTINCT session_id) as unique_sessions
FROM performance_metrics_v2
WHERE timestamp >= ?
GROUP BY tool_name`,
[startTime]
);
toolBreakdown = (detailsResult.data || []).reduce((acc: any, tool: any) => {
acc[tool.tool_name] = {
executions: tool.executions,
successRate: tool.executions > 0 ? tool.successes / tool.executions : 0,
averageResponseTime: Math.round(tool.avg_time || 0),
minResponseTime: Math.round(tool.min_time || 0),
maxResponseTime: Math.round(tool.max_time || 0),
uniqueUsers: tool.unique_users,
uniqueSessions: tool.unique_sessions
};
return acc;
}, {});
}
const result = {
timeRange,
timeRangeMs: timeRangeMs[timeRange],
startTime: new Date(startTime).toISOString(),
endTime: new Date(now).toISOString(),
totalExecutions: metrics.total_executions || 0,
successRate: Math.round(successRate * 1000) / 10,
averageResponseTime: Math.round(metrics.avg_response_time || 0),
maxResponseTime: Math.round(metrics.max_response_time || 0),
errorCount: metrics.error_count || 0,
totalTokensUsed: metrics.total_tokens || 0,
totalCost: metrics.total_cost || 0,
topTools,
...(includeDetails && { toolBreakdown })
};
return createSuccessResult({
metrics: result,
summary: `System health: ${result.successRate}% success rate with ${result.totalExecutions} executions in the last ${timeRange}`
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to get system metrics: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Get tool metrics
*/
const getToolMetricsTool = createTool<GetToolMetricsInput, any>({
name: 'get_tool_metrics',
description: 'Get detailed performance metrics for a specific tool including execution times, success rates, and quality scores. Use this to analyze individual tool performance.',
category: 'performance-monitoring',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
toolName: {
type: 'string',
description: 'Name of the tool to analyze',
minLength: 1
},
timeRange: {
type: 'string',
enum: ['1h', '24h', '7d', '30d'],
default: '24h',
description: 'Time range for metrics aggregation'
},
includeQuality: {
type: 'boolean',
default: true,
description: 'Include quality evaluation metrics'
}
},
required: ['toolName'],
additionalProperties: false
} as JSONSchema7,
async execute(input: GetToolMetricsInput, context: RequestContext) {
try {
const { toolName, timeRange = '24h', includeQuality = true } = input;
// Calculate time bounds
const now = Date.now();
const timeRangeMs = {
'1h': 60 * 60 * 1000,
'24h': 24 * 60 * 60 * 1000,
'7d': 7 * 24 * 60 * 60 * 1000,
'30d': 30 * 24 * 60 * 60 * 1000
};
const startTime = now - timeRangeMs[timeRange];
// Get tool metrics
const metricsResult = await context.db.get(
`SELECT
COUNT(*) as executions,
COUNT(CASE WHEN success = 1 THEN 1 END) as successes,
AVG(execution_time) as avg_time,
MIN(execution_time) as min_time,
MAX(execution_time) as max_time,
AVG(CASE WHEN success = 1 THEN execution_time END) as avg_success_time,
COUNT(DISTINCT error_type) as unique_errors,
SUM(token_count) as total_tokens,
SUM(cost) as total_cost
FROM performance_metrics_v2
WHERE tool_name = ? AND timestamp >= ?`,
[toolName, startTime]
);
if (!metricsResult.success || !metricsResult.data) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to retrieve tool metrics',
category: 'system'
});
}
const metrics = metricsResult.data;
if (metrics.executions === 0) {
return createSuccessResult({
toolName,
timeRange,
metrics: {
executions: 0,
successRate: 0,
averageResponseTime: 0,
errorCount: 0
},
message: `No executions found for tool "${toolName}" in the last ${timeRange}`
});
}
// Get error breakdown if there are errors
let errorBreakdown = {};
if (metrics.unique_errors > 0) {
const errorsResult = await context.db.query(
`SELECT
error_type,
COUNT(*) as count
FROM performance_metrics_v2
WHERE tool_name = ? AND timestamp >= ? AND success = 0
GROUP BY error_type
ORDER BY count DESC`,
[toolName, startTime]
);
errorBreakdown = (errorsResult.data || []).reduce((acc: any, error: any) => {
acc[error.error_type || 'unknown'] = error.count;
return acc;
}, {});
}
// Get quality metrics if requested
let qualityMetrics = null;
if (includeQuality) {
const qualityResult = await context.db.get(
`SELECT
COUNT(*) as evaluation_count,
AVG(output_quality) as avg_quality,
AVG(appropriateness_score) as avg_appropriateness,
AVG(completeness_score) as avg_completeness,
AVG(accuracy_score) as avg_accuracy,
MIN(output_quality) as min_quality,
MAX(output_quality) as max_quality
FROM performance_quality_evaluations
WHERE tool_name = ? AND timestamp >= ?`,
[toolName, startTime]
);
if (qualityResult.success && qualityResult.data && qualityResult.data.evaluation_count > 0) {
qualityMetrics = {
evaluationCount: qualityResult.data.evaluation_count,
averageQuality: Math.round(qualityResult.data.avg_quality * 10) / 10,
averageAppropriateness: Math.round(qualityResult.data.avg_appropriateness * 10) / 10,
averageCompleteness: Math.round(qualityResult.data.avg_completeness * 10) / 10,
averageAccuracy: Math.round(qualityResult.data.avg_accuracy * 10) / 10,
qualityRange: {
min: qualityResult.data.min_quality,
max: qualityResult.data.max_quality
}
};
}
}
const successRate = metrics.successes / metrics.executions;
const errorCount = metrics.executions - metrics.successes;
const result = {
toolName,
timeRange,
metrics: {
executions: metrics.executions,
successRate: Math.round(successRate * 1000) / 10,
averageResponseTime: Math.round(metrics.avg_time || 0),
minResponseTime: Math.round(metrics.min_time || 0),
maxResponseTime: Math.round(metrics.max_time || 0),
averageSuccessResponseTime: Math.round(metrics.avg_success_time || 0),
errorCount,
errorTypes: errorBreakdown,
totalTokens: metrics.total_tokens || 0,
totalCost: metrics.total_cost || 0,
costPerExecution: metrics.total_cost ? metrics.total_cost / metrics.executions : 0
},
...(qualityMetrics && { qualityMetrics })
};
return createSuccessResult(result);
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to get tool metrics: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Generate performance alerts
*/
const generatePerformanceAlertsTool = createTool<GeneratePerformanceAlertsInput, any>({
name: 'generate_performance_alerts',
description: 'Generate alerts for performance issues including slow tools, high error rates, and quality degradation. Use this proactively to identify problems.',
category: 'performance-monitoring',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
alertTypes: {
type: 'array',
items: {
type: 'string',
enum: ['performance', 'errors', 'quality', 'costs', 'security']
},
default: ['performance', 'errors', 'quality'],
description: 'Types of alerts to generate'
},
severity: {
type: 'string',
enum: ['low', 'medium', 'high', 'critical'],
default: 'medium',
description: 'Minimum severity level for alerts'
}
},
required: [],
additionalProperties: false
} as JSONSchema7,
async execute(input: GeneratePerformanceAlertsInput, context: RequestContext) {
try {
const alertTypes = input.alertTypes || ['performance', 'errors', 'quality'];
const minSeverity = input.severity || 'medium';
const alerts: any[] = [];
const now = Date.now();
const last24h = now - 24 * 60 * 60 * 1000;
// Performance alerts
if (alertTypes.includes('performance')) {
const slowToolsResult = await context.db.query(
`SELECT
tool_name,
AVG(execution_time) as avg_time,
COUNT(*) as executions
FROM performance_metrics_v2
WHERE timestamp >= ?
GROUP BY tool_name
HAVING avg_time > 3000 AND executions >= 10
ORDER BY avg_time DESC`,
[last24h]
);
for (const tool of slowToolsResult.data || []) {
alerts.push({
id: randomUUID(),
type: 'performance',
severity: tool.avg_time > 5000 ? 'high' : 'medium',
title: `Slow tool performance: ${tool.tool_name}`,
message: `Average response time is ${Math.round(tool.avg_time)}ms (above 3s threshold)`,
suggestion: `Consider optimizing ${tool.tool_name} implementation or reviewing its usage patterns`,
toolName: tool.tool_name,
metricValue: tool.avg_time,
thresholdValue: 3000
});
}
}
// Error alerts
if (alertTypes.includes('errors')) {
const errorProneToolsResult = await context.db.query(
`SELECT
tool_name,
COUNT(*) as total,
COUNT(CASE WHEN success = 0 THEN 1 END) as failures,
CAST(COUNT(CASE WHEN success = 0 THEN 1 END) AS REAL) / COUNT(*) as error_rate
FROM performance_metrics_v2
WHERE timestamp >= ?
GROUP BY tool_name
HAVING error_rate > 0.2 AND total >= 10
ORDER BY error_rate DESC`,
[last24h]
);
for (const tool of errorProneToolsResult.data || []) {
const errorRate = Math.round(tool.error_rate * 100);
alerts.push({
id: randomUUID(),
type: 'errors',
severity: errorRate > 50 ? 'critical' : errorRate > 30 ? 'high' : 'medium',
title: `High error rate: ${tool.tool_name}`,
message: `${errorRate}% error rate (${tool.failures}/${tool.total} executions)`,
suggestion: `Investigate recent failures and consider adding better error handling`,
toolName: tool.tool_name,
metricValue: tool.error_rate,
thresholdValue: 0.2
});
}
}
// Quality alerts
if (alertTypes.includes('quality')) {
const lowQualityResult = await context.db.query(
`SELECT
tool_name,
AVG(output_quality) as avg_quality,
COUNT(*) as evaluations
FROM performance_quality_evaluations
WHERE timestamp >= ?
GROUP BY tool_name
HAVING avg_quality < 6 AND evaluations >= 3
ORDER BY avg_quality ASC`,
[last24h]
);
for (const tool of lowQualityResult.data || []) {
alerts.push({
id: randomUUID(),
type: 'quality',
severity: tool.avg_quality < 4 ? 'high' : 'medium',
title: `Low quality scores: ${tool.tool_name}`,
message: `Average quality rating is ${Math.round(tool.avg_quality * 10) / 10}/10`,
suggestion: `Review recent outputs and consider improving prompts or logic`,
toolName: tool.tool_name,
metricValue: tool.avg_quality,
thresholdValue: 6
});
}
}
// Cost alerts
if (alertTypes.includes('costs')) {
const costResult = await context.db.get(
`SELECT
SUM(cost) as total_cost,
AVG(cost) as avg_cost,
COUNT(*) as executions
FROM performance_metrics_v2
WHERE timestamp >= ? AND cost > 0`,
[last24h]
);
if (costResult.success && costResult.data && costResult.data.total_cost > 100) {
alerts.push({
id: randomUUID(),
type: 'costs',
severity: costResult.data.total_cost > 500 ? 'high' : 'medium',
title: 'High usage costs',
message: `Total cost in last 24h: $${costResult.data.total_cost.toFixed(2)}`,
suggestion: 'Review usage patterns and consider optimizing expensive operations',
metricValue: costResult.data.total_cost,
thresholdValue: 100
});
}
}
// Filter by severity
const severityLevels = { low: 1, medium: 2, high: 3, critical: 4 };
const minLevel = severityLevels[minSeverity];
const filteredAlerts = alerts.filter(alert =>
severityLevels[alert.severity as keyof typeof severityLevels] >= minLevel
);
// Store alerts in database
for (const alert of filteredAlerts) {
await context.db.run(
`INSERT INTO performance_alerts
(id, type, severity, title, message, suggestion, tool_name,
metric_value, threshold_value, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
alert.id,
alert.type,
alert.severity,
alert.title,
alert.message,
alert.suggestion || null,
alert.toolName || null,
alert.metricValue || null,
alert.thresholdValue || null,
now
]
);
}
return createSuccessResult({
alertCount: filteredAlerts.length,
alerts: filteredAlerts,
generatedAt: new Date(now).toISOString(),
timeRange: '24h',
filters: {
alertTypes,
minSeverity
}
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to generate alerts: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Export performance metrics
*/
const exportPerformanceMetricsTool = createTool<ExportPerformanceMetricsInput, any>({
name: 'export_performance_metrics',
description: 'Export performance metrics to CSV or JSON format for external analysis. Use this to create reports or integrate with other monitoring systems.',
category: 'performance-monitoring',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
format: {
type: 'string',
enum: ['csv', 'json'],
default: 'csv',
description: 'Export format'
},
timeRange: {
type: 'string',
enum: ['1h', '24h', '7d', '30d'],
default: '24h',
description: 'Time range for data export'
},
includeQuality: {
type: 'boolean',
default: true,
description: 'Include quality evaluation data'
},
filePath: {
type: 'string',
description: 'Optional file path to save export (defaults to .atlas/exports/)'
}
},
required: [],
additionalProperties: false
} as JSONSchema7,
async execute(input: ExportPerformanceMetricsInput, context: RequestContext) {
try {
const { format = 'csv', timeRange = '24h', includeQuality = true } = input;
const exportId = randomUUID();
const now = Date.now();
// Calculate time bounds
const timeRangeMs = {
'1h': 60 * 60 * 1000,
'24h': 24 * 60 * 60 * 1000,
'7d': 7 * 24 * 60 * 60 * 1000,
'30d': 30 * 24 * 60 * 60 * 1000
};
const startTime = now - timeRangeMs[timeRange];
// Get metrics data
const metricsResult = await context.db.query(
`SELECT * FROM performance_metrics_v2
WHERE timestamp >= ?
ORDER BY timestamp DESC`,
[startTime]
);
let exportData = '';
const fileName = `performance-metrics-${new Date().toISOString().split('T')[0]}-${timeRange}.${format}`;
const filePath = input.filePath || `.atlas/exports/${fileName}`;
if (format === 'csv') {
// Generate CSV
const headers = [
'timestamp', 'tool_name', 'execution_time', 'success', 'error_type',
'token_count', 'cost', 'user_id', 'session_id', 'request_id'
];
const rows = [headers.join(',')];
for (const metric of metricsResult.data || []) {
const row = [
new Date(metric.timestamp).toISOString(),
metric.tool_name,
metric.execution_time,
metric.success ? 'true' : 'false',
metric.error_type || '',
metric.token_count || '',
metric.cost || '',
metric.user_id || '',
metric.session_id,
metric.request_id || ''
];
rows.push(row.map(v => `"${v}"`).join(','));
}
exportData = rows.join('\n');
} else {
// Generate JSON
const systemMetrics = await getSystemMetricsTool.execute({ timeRange, includeDetails: true }, context);
const qualityData = includeQuality ? await context.db.query(
`SELECT * FROM performance_quality_evaluations
WHERE timestamp >= ?
ORDER BY timestamp DESC`,
[startTime]
) : null;
const jsonData = {
exportMetadata: {
exportId,
generatedAt: new Date(now).toISOString(),
timeRange,
startTime: new Date(startTime).toISOString(),
endTime: new Date(now).toISOString(),
includeQuality
},
systemMetrics: systemMetrics.success ? systemMetrics.data : null,
metrics: metricsResult.data || [],
...(qualityData && { qualityEvaluations: qualityData.data || [] })
};
exportData = JSON.stringify(jsonData, null, 2);
}
// Store export record
await context.db.run(
`INSERT INTO performance_exports
(id, format, time_range, include_quality, file_path, export_data, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)`,
[exportId, format, timeRange, includeQuality ? 1 : 0, filePath, exportData, now]
);
return createSuccessResult({
exportId,
format,
timeRange,
filePath,
fileSize: Buffer.byteLength(exportData, 'utf8'),
recordCount: metricsResult.data?.length || 0,
generatedAt: new Date(now).toISOString(),
message: `Performance metrics exported successfully to ${filePath}`
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to export metrics: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Record quality evaluation
*/
const recordQualityEvaluationTool = createTool<RecordQualityEvaluationInput, any>({
name: 'record_quality_evaluation',
description: 'Record quality evaluation feedback for a tool execution. Use this to rate outputs and improve recommendation accuracy.',
category: 'performance-monitoring',
inputSchema: {
type: 'object',
properties: {
toolName: {
type: 'string',
description: 'Name of the tool being evaluated',
minLength: 1
},
quality: {
type: 'number',
minimum: 1,
maximum: 10,
description: 'Quality rating from 1 (poor) to 10 (excellent)'
},
feedback: {
type: 'string',
description: 'Optional detailed feedback about the output quality'
},
executionId: {
type: 'string',
description: 'Optional execution ID to link evaluation to specific run'
}
},
required: ['toolName', 'quality'],
additionalProperties: false
} as JSONSchema7,
async execute(input: RecordQualityEvaluationInput, context: RequestContext) {
try {
const { toolName, quality, feedback, executionId } = input;
const evaluationId = randomUUID();
const now = Date.now();
// Validate quality is integer
const qualityInt = Math.round(quality);
if (qualityInt < 1 || qualityInt > 10) {
return createErrorResult({
code: 'INVALID_INPUT',
message: 'Quality rating must be between 1 and 10',
category: 'validation'
});
}
// Use quality as proxy for all scores in simple evaluation
const result = await context.db.run(
`INSERT INTO performance_quality_evaluations
(id, tool_name, execution_id, output_quality, appropriateness_score,
completeness_score, accuracy_score, comments, evaluation_criteria,
timestamp, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
[
evaluationId,
toolName,
executionId || null,
qualityInt,
qualityInt, // Use quality as proxy
qualityInt,
qualityInt,
feedback || '',
JSON.stringify(['user_feedback']),
now,
now
]
);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to record quality evaluation',
details: { error: result.error },
category: 'system'
});
}
return createSuccessResult({
evaluationId,
toolName,
quality: qualityInt,
feedback: feedback || null,
executionId: executionId || null,
recordedAt: new Date(now).toISOString(),
message: `Quality evaluation recorded successfully for ${toolName}`
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to record evaluation: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Get quality metrics
*/
const getQualityMetricsTool = createTool<GetQualityMetricsInput, any>({
name: 'get_quality_metrics',
description: 'Get quality evaluation metrics for tools including average ratings, feedback trends, and improvement recommendations.',
category: 'performance-monitoring',
readOnly: true,
inputSchema: {
type: 'object',
properties: {
toolName: {
type: 'string',
description: 'Specific tool to analyze (optional - defaults to all tools)'
},
timeRange: {
type: 'string',
enum: ['1h', '24h', '7d', '30d'],
default: '7d',
description: 'Time range for quality analysis'
},
minRatings: {
type: 'number',
minimum: 1,
default: 3,
description: 'Minimum number of ratings required for analysis'
}
},
required: [],
additionalProperties: false
} as JSONSchema7,
async execute(input: GetQualityMetricsInput, context: RequestContext) {
try {
const { toolName, timeRange = '7d', minRatings = 3 } = input;
// Calculate time bounds
const now = Date.now();
const timeRangeMs = {
'1h': 60 * 60 * 1000,
'24h': 24 * 60 * 60 * 1000,
'7d': 7 * 24 * 60 * 60 * 1000,
'30d': 30 * 24 * 60 * 60 * 1000
};
const startTime = now - timeRangeMs[timeRange];
let sql = `
SELECT
tool_name,
COUNT(*) as evaluation_count,
AVG(output_quality) as avg_quality,
AVG(appropriateness_score) as avg_appropriateness,
AVG(completeness_score) as avg_completeness,
AVG(accuracy_score) as avg_accuracy,
MIN(output_quality) as min_quality,
MAX(output_quality) as max_quality
FROM performance_quality_evaluations
WHERE timestamp >= ?`;
const params: any[] = [startTime];
if (toolName) {
sql += ' AND tool_name = ?';
params.push(toolName);
}
sql += ' GROUP BY tool_name HAVING evaluation_count >= ?';
params.push(minRatings);
const result = await context.db.query(sql, params);
if (!result.success) {
return createErrorResult({
code: 'DATABASE_ERROR',
message: 'Failed to retrieve quality metrics',
category: 'system'
});
}
const tools = result.data || [];
if (tools.length === 0) {
return createSuccessResult({
timeRange,
minRatings,
toolName: toolName || null,
message: `No quality data available for the specified criteria`,
metrics: {}
});
}
// Calculate overall statistics
const totalEvaluations = tools.reduce((sum: number, tool: any) => sum + tool.evaluation_count, 0);
const avgQuality = tools.reduce((sum: number, tool: any) =>
sum + tool.avg_quality * tool.evaluation_count, 0) / totalEvaluations;
// Get recent feedback
const feedbackResult = await context.db.query(
`SELECT
tool_name,
output_quality,
comments,
timestamp
FROM performance_quality_evaluations
WHERE timestamp >= ? AND comments != ''
${toolName ? 'AND tool_name = ?' : ''}
ORDER BY timestamp DESC
LIMIT 10`,
toolName ? [startTime, toolName] : [startTime]
);
const recentFeedback = (feedbackResult.data || []).map((item: any) => ({
toolName: item.tool_name,
quality: item.output_quality,
feedback: item.comments,
timestamp: new Date(item.timestamp).toISOString()
}));
// Format tool metrics
const toolMetrics = tools.reduce((acc: any, tool: any) => {
// Determine trend
let trend = 'stable';
if (tool.evaluation_count >= 5) {
// Simple trend: if average is closer to max, it's improving
const range = tool.max_quality - tool.min_quality;
if (range > 2) {
const position = (tool.avg_quality - tool.min_quality) / range;
trend = position > 0.7 ? 'improving' : position < 0.3 ? 'declining' : 'stable';
}
}
acc[tool.tool_name] = {
evaluationCount: tool.evaluation_count,
averageQuality: Math.round(tool.avg_quality * 10) / 10,
averageAppropriateness: Math.round(tool.avg_appropriateness * 10) / 10,
averageCompleteness: Math.round(tool.avg_completeness * 10) / 10,
averageAccuracy: Math.round(tool.avg_accuracy * 10) / 10,
qualityRange: {
min: tool.min_quality,
max: tool.max_quality
},
trend
};
return acc;
}, {});
return createSuccessResult({
timeRange,
minRatings,
summary: {
toolsEvaluated: tools.length,
totalEvaluations,
averageQuality: Math.round(avgQuality * 10) / 10
},
metrics: toolMetrics,
recentFeedback
});
} catch (error) {
return createErrorResult({
code: 'EXECUTION_ERROR',
message: `Failed to get quality metrics: ${error instanceof Error ? error.message : 'Unknown error'}`,
category: 'execution'
});
}
}
});
/**
* Setup performance monitoring tools
*/
export async function setupPerformanceMonitoringTools(): Promise<ToolRegistration> {
return {
module: 'performance-monitoring',
tools: [
getSystemMetricsTool,
getToolMetricsTool,
generatePerformanceAlertsTool,
exportPerformanceMetricsTool,
recordQualityEvaluationTool,
getQualityMetricsTool
]
};
}