claude-expert-workflow-mcp
Version:
Production-ready MCP server for AI-powered product development consultation through specialized expert roles. Enterprise-grade with memory management, monitoring, and Claude Code integration.
583 lines • 22.1 kB
JavaScript
import Anthropic from '@anthropic-ai/sdk';
import { config } from '../config/environment';
import { structuredLogger } from '../monitoring/structuredLogger';
import { PerformanceMonitor, ResourcePool } from '../performance';
import { ErrorRecoverySystem } from '../resilience/advancedErrorHandling';
export var RequestPriority;
(function (RequestPriority) {
RequestPriority[RequestPriority["LOW"] = 1] = "LOW";
RequestPriority[RequestPriority["NORMAL"] = 2] = "NORMAL";
RequestPriority[RequestPriority["HIGH"] = 3] = "HIGH";
RequestPriority[RequestPriority["CRITICAL"] = 4] = "CRITICAL";
})(RequestPriority || (RequestPriority = {}));
/**
* Advanced rate limiter with adaptive throttling
*/
export class AdaptiveRateLimiter {
constructor(config) {
this.config = config;
this.requestWindow = [];
this.tokenWindow = [];
this.burstRequests = 0;
this.lastResetTime = Date.now();
this.adaptiveMultiplier = 1.0;
this.consecutiveErrors = 0;
}
/**
* Check if request can be made based on rate limits
*/
canMakeRequest(estimatedTokens = 0) {
return new Promise((resolve) => {
const now = Date.now();
this.cleanupOldEntries(now);
// Check request rate limit
if (this.requestWindow.length >= this.getAdjustedRequestLimit()) {
resolve(false);
return;
}
// Check token rate limit
const currentTokens = this.tokenWindow.reduce((sum, tokens) => sum + tokens, 0);
if (currentTokens + estimatedTokens > this.getAdjustedTokenLimit()) {
resolve(false);
return;
}
// Check burst limit
if (this.burstRequests >= this.config.burstLimit) {
resolve(false);
return;
}
resolve(true);
});
}
/**
* Record a successful request
*/
recordRequest(tokensUsed) {
const now = Date.now();
this.requestWindow.push(now);
this.tokenWindow.push(tokensUsed);
this.burstRequests++;
// Reset burst counter every second
if (now - this.lastResetTime > 1000) {
this.burstRequests = 0;
this.lastResetTime = now;
}
// Reduce adaptive multiplier on success
if (this.consecutiveErrors > 0) {
this.consecutiveErrors = 0;
this.adaptiveMultiplier = Math.min(this.adaptiveMultiplier * 1.1, 1.0);
}
}
/**
* Record a failed request (adapts rate limiting)
*/
recordError(error) {
this.consecutiveErrors++;
if (this.config.adaptiveThrottling) {
// Reduce rate limit on consecutive errors
if (this.consecutiveErrors >= 3) {
this.adaptiveMultiplier = Math.max(this.adaptiveMultiplier * 0.8, 0.3);
}
}
}
/**
* Get estimated wait time until next request can be made
*/
getEstimatedWaitTime() {
const now = Date.now();
this.cleanupOldEntries(now);
if (this.requestWindow.length < this.getAdjustedRequestLimit()) {
return 0;
}
// Calculate when the oldest request in the window will expire
const oldestRequest = Math.min(...this.requestWindow);
const waitTime = 60000 - (now - oldestRequest); // 60 second window
return Math.max(waitTime, 0);
}
cleanupOldEntries(now) {
const windowStart = now - 60000; // 60 second sliding window
this.requestWindow = this.requestWindow.filter(time => time > windowStart);
this.tokenWindow = this.tokenWindow.slice(-this.requestWindow.length);
}
getAdjustedRequestLimit() {
return Math.floor(this.config.requestsPerMinute * this.adaptiveMultiplier);
}
getAdjustedTokenLimit() {
return Math.floor(this.config.tokensPerMinute * this.adaptiveMultiplier);
}
getMetrics() {
const now = Date.now();
this.cleanupOldEntries(now);
return {
currentRequests: this.requestWindow.length,
currentTokens: this.tokenWindow.reduce((sum, tokens) => sum + tokens, 0),
adaptiveMultiplier: this.adaptiveMultiplier,
consecutiveErrors: this.consecutiveErrors,
estimatedWaitTime: this.getEstimatedWaitTime()
};
}
}
/**
* Request queue with priority handling and batching
*/
export class PriorityRequestQueue {
constructor() {
this.queues = new Map();
this.processing = false;
this.metrics = {
queueSize: 0,
averageWaitTime: 0,
processedRequests: 0,
failedRequests: 0,
throughput: 0
};
this.waitTimes = [];
// Initialize priority queues
Object.values(RequestPriority).forEach(priority => {
if (typeof priority === 'number') {
this.queues.set(priority, []);
}
});
}
/**
* Add request to queue with priority
*/
enqueue(request) {
const queue = this.queues.get(request.priority);
if (!queue) {
throw new Error(`Invalid priority: ${request.priority}`);
}
// Add timestamp for wait time calculation
request.queuedAt = Date.now();
queue.push(request);
this.updateQueueSize();
}
/**
* Get next request based on priority
*/
dequeue() {
// Process highest priority first
for (let priority = RequestPriority.CRITICAL; priority >= RequestPriority.LOW; priority--) {
const queue = this.queues.get(priority);
if (queue && queue.length > 0) {
const request = queue.shift();
// Record wait time
const waitTime = Date.now() - request.queuedAt;
this.recordWaitTime(waitTime);
this.updateQueueSize();
return request;
}
}
return null;
}
/**
* Batch multiple requests if possible
*/
dequeueBatch(maxBatchSize = 5) {
const batch = [];
while (batch.length < maxBatchSize) {
const request = this.dequeue();
if (!request)
break;
batch.push(request);
}
return batch;
}
/**
* Get queue size for specific priority
*/
getQueueSize(priority) {
if (priority !== undefined) {
return this.queues.get(priority)?.length || 0;
}
return Array.from(this.queues.values())
.reduce((total, queue) => total + queue.length, 0);
}
/**
* Clear all queues
*/
clear() {
this.queues.forEach(queue => queue.splice(0));
this.updateQueueSize();
}
/**
* Get queue metrics
*/
getMetrics() {
return { ...this.metrics };
}
updateQueueSize() {
this.metrics.queueSize = this.getQueueSize();
}
recordWaitTime(waitTime) {
this.waitTimes.push(waitTime);
// Keep only recent wait times (last 100)
if (this.waitTimes.length > 100) {
this.waitTimes.shift();
}
this.metrics.averageWaitTime =
this.waitTimes.reduce((sum, time) => sum + time, 0) / this.waitTimes.length;
}
recordProcessedRequest(success) {
if (success) {
this.metrics.processedRequests++;
}
else {
this.metrics.failedRequests++;
}
// Calculate throughput (requests per second over last minute)
// This is a simplified calculation - in production you'd want a sliding window
const totalRequests = this.metrics.processedRequests + this.metrics.failedRequests;
this.metrics.throughput = totalRequests / 60; // Approximate
}
}
/**
* Optimized Claude client with advanced features
*/
export class OptimizedClaudeClient {
constructor(rateLimitConfig = {
requestsPerMinute: 50,
tokensPerMinute: 100000,
burstLimit: 10,
queueSize: 1000,
adaptiveThrottling: true
}, connectionConfig = {
maxConcurrentRequests: 5,
connectionTimeout: 30000,
requestTimeout: 120000,
keepAliveTimeout: 60000,
retryConfig: {
maxRetries: 3,
backoffMultiplier: 2,
maxBackoffDelay: 30000
}
}) {
this.rateLimitConfig = rateLimitConfig;
this.connectionConfig = connectionConfig;
this.logger = structuredLogger;
this.tokenUsageMetrics = {
totalTokensUsed: 0,
requestCount: 0,
averageTokensPerRequest: 0,
costEstimate: 0,
quotaUsagePercentage: 0
};
this.isProcessing = false;
this.client = new Anthropic({
apiKey: config.claude.apiKey,
timeout: connectionConfig.requestTimeout
});
this.rateLimiter = new AdaptiveRateLimiter(rateLimitConfig);
this.requestQueue = new PriorityRequestQueue();
this.errorRecovery = new ErrorRecoverySystem();
// Create connection pool
this.connectionPool = new ResourcePool(() => this.createConnection(), (conn) => this.destroyConnection(conn), connectionConfig.maxConcurrentRequests, connectionConfig.connectionTimeout);
this.startRequestProcessor();
}
/**
* Enhanced chat method with batching and optimization
*/
async chat(messages, options = {}) {
return new Promise((resolve, reject) => {
const request = {
id: this.generateRequestId(),
method: 'chat',
params: {
messages,
systemPrompt: options.systemPrompt,
maxTokens: options.maxTokens || config.claude.maxTokens,
temperature: options.temperature || config.claude.temperature,
model: options.model || config.claude.model
},
priority: options.priority || RequestPriority.NORMAL,
timeout: options.timeout,
callback: (result) => {
if (result.success && result.result) {
resolve(result.result);
}
else {
reject(new Error(result.error || 'Unknown error'));
}
}
};
this.requestQueue.enqueue(request);
});
}
/**
* Batch multiple requests for efficient processing
*/
async batchRequests(requests) {
const results = [];
// Add requests to queue
requests.forEach(request => this.requestQueue.enqueue(request));
return new Promise((resolve) => {
const completed = new Set();
const batchResults = [];
// Override callbacks to collect results
requests.forEach(request => {
const originalCallback = request.callback;
request.callback = (result) => {
batchResults.push(result);
completed.add(result.id);
if (originalCallback) {
originalCallback(result);
}
// Check if all requests completed
if (completed.size === requests.length) {
resolve(batchResults);
}
};
});
});
}
/**
* Optimized expert consultation with caching and rate limiting
*/
async consultExpert(expertPrompt, userMessage, conversationHistory = [], options = {}) {
const correlationId = this.generateCorrelationId();
try {
PerformanceMonitor.startTimer(`expert_consultation_${options.expertType || 'unknown'}`);
// Build messages array
const messages = [
...conversationHistory,
{ role: 'user', content: userMessage }
];
// Estimate token usage for rate limiting
const estimatedTokens = this.estimateTokenUsage(expertPrompt, messages);
// Check if we can make the request
const canProceed = await this.rateLimiter.canMakeRequest(estimatedTokens);
if (!canProceed) {
const waitTime = this.rateLimiter.getEstimatedWaitTime();
this.logger.logWorkflow('warn', 'Rate limit reached, request queued', options.workflowId || 'system', {
estimatedWaitTime: waitTime,
correlationId
});
}
const result = await this.chat(messages, {
systemPrompt: expertPrompt,
priority: options.priority || RequestPriority.HIGH,
timeout: this.connectionConfig.requestTimeout
});
const duration = PerformanceMonitor.endTimer(`expert_consultation_${options.expertType || 'unknown'}`);
// Update metrics
this.updateTokenUsageMetrics(estimatedTokens, duration);
this.rateLimiter.recordRequest(estimatedTokens);
this.logger.logWorkflow('debug', 'Expert consultation completed', options.workflowId || 'system', {
expertType: options.expertType,
duration,
tokensUsed: estimatedTokens,
correlationId
});
return result;
}
catch (error) {
PerformanceMonitor.endTimer(`expert_consultation_${options.expertType || 'unknown'}`);
this.rateLimiter.recordError(error);
// Apply error recovery
const context = {
operation: 'expert_consultation',
expertType: options.expertType,
workflowId: options.workflowId,
correlationId,
timestamp: Date.now(),
metadata: { userMessage: userMessage.substring(0, 100) }
};
const recoveryResult = await this.errorRecovery.handleError(error, context);
if (!recoveryResult.success) {
throw error;
}
// If recovery suggests retry, attempt once more
if (recoveryResult.action === 'retry') {
return this.consultExpert(expertPrompt, userMessage, conversationHistory, options);
}
throw error;
}
}
/**
* Get comprehensive API metrics
*/
getMetrics() {
return {
tokenUsage: { ...this.tokenUsageMetrics },
queueMetrics: this.requestQueue.getMetrics(),
rateLimiterMetrics: this.rateLimiter.getMetrics(),
connectionPoolStats: this.connectionPool.getStats()
};
}
/**
* Optimize performance based on current metrics
*/
async optimizePerformance() {
const metrics = this.getMetrics();
// Adjust rate limiting based on performance
if (metrics.queueMetrics.averageWaitTime > 10000) { // 10 seconds
this.logger.logWorkflow('info', 'High wait times detected, adjusting rate limits', 'system', {
averageWaitTime: metrics.queueMetrics.averageWaitTime
});
// Implement dynamic rate limit adjustment
}
// Optimize connection pool size
const poolStats = metrics.connectionPoolStats;
if (poolStats.waiting > 0 && poolStats.total < 10) {
this.logger.logWorkflow('info', 'Expanding connection pool', 'system', {
currentSize: poolStats.total,
waitingRequests: poolStats.waiting
});
}
}
/**
* Graceful shutdown
*/
async shutdown() {
this.isProcessing = false;
if (this.processingInterval) {
clearInterval(this.processingInterval);
}
// Process remaining requests
const remainingRequests = this.requestQueue.getQueueSize();
if (remainingRequests > 0) {
this.logger.logWorkflow('info', `Processing ${remainingRequests} remaining requests before shutdown`, 'system');
// Give some time to process remaining requests
await new Promise(resolve => setTimeout(resolve, 5000));
}
await this.connectionPool.destroy();
this.logger.logWorkflow('info', 'Claude client shutdown completed', 'system');
}
// Private methods
startRequestProcessor() {
this.isProcessing = true;
this.processingInterval = setInterval(async () => {
if (!this.isProcessing)
return;
await this.processQueuedRequests();
}, 100); // Process every 100ms
}
async processQueuedRequests() {
const batch = this.requestQueue.dequeueBatch(5);
if (batch.length === 0)
return;
// Process requests in parallel (respecting rate limits)
const processingPromises = batch.map(request => this.processRequest(request));
await Promise.allSettled(processingPromises);
}
async processRequest(request) {
const startTime = Date.now();
try {
// Check rate limits
const estimatedTokens = this.estimateTokenUsageFromRequest(request);
const canProceed = await this.rateLimiter.canMakeRequest(estimatedTokens);
if (!canProceed) {
// Requeue request
this.requestQueue.enqueue(request);
return;
}
// Get connection from pool
const connection = await this.connectionPool.acquire();
try {
let result;
if (request.method === 'chat') {
const params = request.params;
result = await this.executeChat(connection, params);
}
else {
throw new Error(`Unsupported method: ${request.method}`);
}
const duration = Date.now() - startTime;
// Update metrics
this.rateLimiter.recordRequest(estimatedTokens);
this.updateTokenUsageMetrics(estimatedTokens, duration);
this.requestQueue.recordProcessedRequest(true);
// Call success callback
if (request.callback) {
request.callback({
id: request.id,
success: true,
result,
duration,
tokensUsed: estimatedTokens
});
}
}
finally {
await this.connectionPool.release(connection);
}
}
catch (error) {
const duration = Date.now() - startTime;
this.rateLimiter.recordError(error);
this.requestQueue.recordProcessedRequest(false);
// Call error callback
if (request.callback) {
request.callback({
id: request.id,
success: false,
error: error.message,
duration
});
}
this.logger.logError(error, 'Request processing failed', {
requestId: request.id,
method: request.method
});
}
}
async executeChat(connection, params) {
const response = await connection.messages.create({
model: params.model || config.claude.model,
max_tokens: params.maxTokens || config.claude.maxTokens,
temperature: params.temperature || config.claude.temperature,
system: params.systemPrompt,
messages: params.messages
});
const content = response.content[0];
if (content.type === 'text') {
return content.text;
}
throw new Error('Unexpected response type from Claude API');
}
async createConnection() {
return new Anthropic({
apiKey: config.claude.apiKey,
timeout: this.connectionConfig.connectionTimeout
});
}
async destroyConnection(connection) {
// Cleanup connection resources if needed
// Anthropic client doesn't require explicit cleanup
}
estimateTokenUsage(systemPrompt, messages) {
// Simple token estimation (4 chars ≈ 1 token)
let totalChars = (systemPrompt || '').length;
messages.forEach(msg => {
if (typeof msg.content === 'string') {
totalChars += msg.content.length;
}
});
return Math.ceil(totalChars / 4);
}
estimateTokenUsageFromRequest(request) {
if (request.method === 'chat') {
const params = request.params;
return this.estimateTokenUsage(params.systemPrompt || '', params.messages);
}
return 1000; // Default estimation
}
updateTokenUsageMetrics(tokensUsed, duration) {
this.tokenUsageMetrics.totalTokensUsed += tokensUsed;
this.tokenUsageMetrics.requestCount++;
this.tokenUsageMetrics.averageTokensPerRequest =
this.tokenUsageMetrics.totalTokensUsed / this.tokenUsageMetrics.requestCount;
// Rough cost estimation (based on Claude pricing)
this.tokenUsageMetrics.costEstimate =
this.tokenUsageMetrics.totalTokensUsed * 0.000008; // Approximate cost per token
}
generateRequestId() {
return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
generateCorrelationId() {
return `corr_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
}
// Export optimized client instance
export const optimizedClaudeClient = new OptimizedClaudeClient();
//# sourceMappingURL=optimizedClient.js.map