UNPKG

dfa-mcp-server

Version:

DFA-based workflow MCP server for guiding LLM task completion

928 lines (912 loc) 41 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.JudgeEngine = void 0; const fs = __importStar(require("fs/promises")); const path = __importStar(require("path")); const error_formatter_js_1 = require("./error-formatter.js"); class JudgeEngine { decisionHistory = new Map(); llmBaseUrl; llmApiKey; llmModel; llmThinkingMode; historyDir; // Retry configuration MAX_RETRIES = 3; RETRY_DELAY = 1000; // 1 second initial delay RETRY_BACKOFF = 2; // Exponential backoff multiplier // History configuration MEMORY_CACHE_SIZE = 20; // Keep last N entries in memory per workflow constructor(workflowDir = '.workflows') { // Load LLM configuration from environment this.llmBaseUrl = process.env.LLM_BASE_URL; this.llmApiKey = process.env.LLM_API_KEY; this.llmModel = process.env.LLM_JUDGE_MODEL || 'gemini-2.5-pro'; this.llmThinkingMode = process.env.LLM_JUDGE_THINKING_MODE || 'high'; // Set up history directory this.historyDir = path.join(workflowDir, 'history'); } /** * Initialize the judge engine and create history directory */ async initialize() { try { await fs.mkdir(this.historyDir, { recursive: true }); } catch (error) { console.error('Failed to create history directory:', error); } } /** * Validate a transition attempt using the workflow's judge configuration */ async validateTransition(attempt) { const { definition } = attempt; // If judge is not enabled, approve everything if (!definition.judgeConfig?.enabled) { return { approved: true, confidence: 1.0, reasoning: 'Judge not enabled for this workflow' }; } const config = definition.judgeConfig; // Perform sanity check first const sanityCheck = await this.performSanityCheck(attempt); if (!sanityCheck.safe) { console.warn('Judge Sanity Check failed with warnings:', sanityCheck.warnings); // Add warnings to the decision metadata if (config.strictMode) { // In strict mode, reject transitions that fail sanity check return { approved: false, confidence: 0, reasoning: 'Transition rejected due to potential validation bypass attempt', violations: sanityCheck.warnings, suggestions: ['Remove suspicious patterns from validators and data'], metadata: { sanityCheckFailed: true, warnings: sanityCheck.warnings } }; } } // Use LLM judge if configured and available if (config.useLLM && this.llmBaseUrl && this.llmApiKey) { console.log('LLM Judge: Attempting to use LLM validation'); console.log(`LLM Config: URL=${this.llmBaseUrl}, Model=${this.llmModel}`); try { const decision = await this.callLLMJudge(attempt); console.log('LLM Judge: Successfully received decision'); // Add sanity check warnings to decision if any if (sanityCheck.warnings.length > 0) { decision.metadata = { ...decision.metadata, sanityCheckWarnings: sanityCheck.warnings }; } return decision; } catch (error) { console.error('LLM judge failed, falling back to structural validation:', error); // Fall through to structural validation } } else { console.log(`LLM Judge: Not using LLM (useLLM=${config.useLLM}, hasUrl=${!!this.llmBaseUrl}, hasKey=${!!this.llmApiKey})`); } const violations = []; const suggestions = []; let totalConfidence = 0; let validationCount = 0; // 1. Validate structural correctness (40% weight) const structuralResult = this.validateStructure(attempt); totalConfidence += structuralResult.confidence * 0.4; validationCount += 0.4; if (!structuralResult.valid) { violations.push(`Structural: ${structuralResult.reason}`); } // 2. Validate exit conditions of current state (15% weight) const exitResult = this.validateStateExit(attempt); if (exitResult) { totalConfidence += exitResult.confidence * 0.15; validationCount += 0.15; if (!exitResult.valid) { violations.push(`Exit condition: ${exitResult.reason}`); suggestions.push('Ensure all exit conditions are met before transitioning'); } } // 3. Validate entry conditions of next state (15% weight) const entryResult = this.validateStateEntry(attempt); if (entryResult) { totalConfidence += entryResult.confidence * 0.15; validationCount += 0.15; if (!entryResult.valid) { violations.push(`Entry condition: ${entryResult.reason}`); suggestions.push('Check that the target state prerequisites are satisfied'); } } // 4. Validate transition-specific rules (20% weight) const transitionResult = this.validateTransitionRules(attempt); if (transitionResult) { totalConfidence += transitionResult.confidence * 0.2; validationCount += 0.2; if (!transitionResult.valid) { violations.push(`Transition rule: ${transitionResult.reason}`); } } // 5. Validate data completeness (10% weight) const dataResult = this.validateDataCompleteness(attempt); totalConfidence += dataResult.confidence * 0.1; validationCount += 0.1; if (!dataResult.valid) { violations.push(`Data: ${dataResult.reason}`); suggestions.push('Provide all required data fields for this action'); } // 6. Run custom validation rules if (config.validationRules) { for (const rule of config.validationRules) { const result = rule.validate(attempt); if (!result.valid) { violations.push(`${rule.name}: ${result.reason}`); } } } // 7. Run custom validator if provided if (config.customValidator) { return config.customValidator(attempt); } // Calculate final confidence // Note: totalConfidence already has weighted values, no need to divide const confidence = totalConfidence; // Determine approval based on confidence and strict mode let approved = violations.length === 0; if (config.strictMode && config.minConfidence) { approved = approved && confidence >= config.minConfidence; if (confidence < config.minConfidence) { violations.push(`Confidence ${confidence.toFixed(2)} below minimum ${config.minConfidence}`); } } // Generate reasoning const reasoning = this.generateReasoning(attempt, violations, confidence); // Generate suggestions if not approved if (!approved && suggestions.length === 0) { suggestions.push(...this.generateSuggestions(attempt, violations)); } const decision = { approved, confidence, reasoning, violations: violations.length > 0 ? violations : undefined, suggestions: suggestions.length > 0 ? suggestions : undefined, metadata: sanityCheck.warnings.length > 0 ? { sanityCheckWarnings: sanityCheck.warnings } : undefined }; // Store decision in history await this.recordDecision(attempt.workflowId, decision); return decision; } /** * Evaluate transition conditions using LLM to determine which rule matches */ async evaluateTransitionConditions(rules, context, attempt) { // If LLM is not available, fall back to simple evaluation if (!this.llmBaseUrl || !this.llmApiKey) { return this.evaluateConditionsSimple(rules, context); } const prompt = this.buildConditionEvaluationPrompt(rules, context, attempt); try { const response = await fetch(`${this.llmBaseUrl}/chat/completions`, { method: 'POST', headers: { 'x-litellm-api-key': this.llmApiKey, 'Content-Type': 'application/json', 'accept': 'application/json' }, body: JSON.stringify({ model: this.llmModel, messages: [ { role: 'system', content: 'You are a workflow condition evaluator. Analyze conditions and determine which ones are true based on the context.' }, { role: 'user', content: prompt } ], temperature: 0.2, // Low temperature for consistent evaluation max_tokens: 4000 }) }); if (!response.ok) { throw new Error(`LLM API error: ${response.status} ${response.statusText}`); } const data = await response.json(); const content = data.choices?.[0]?.message?.content; if (!content) { throw new Error('No content in LLM response'); } return this.parseConditionEvaluationResponse(content, rules); } catch (error) { console.error('LLM condition evaluation failed, falling back to simple evaluation:', error); return this.evaluateConditionsSimple(rules, context); } } /** * Build prompt for LLM condition evaluation */ buildConditionEvaluationPrompt(rules, context, attempt) { return `Evaluate these code-like conditions against the provided context. Use your reasoning to understand what each condition is checking for. Current State: ${attempt.fromState} Action: ${attempt.action} Workflow: ${attempt.definition.name} Context Data: ${JSON.stringify(context, null, 2)} Conditions to evaluate: ${rules.map((r, i) => `${i + 1}. "${r.condition}"${r.description ? ` // ${r.description}` : ''} -> target: "${r.target}"`).join('\n')} For each condition: 1. Parse what properties and values it's checking 2. Find or intelligently derive those values from the context 3. Apply the logical operators (>, <, >=, <=, ===, !==, &&, ||) 4. Determine if the condition is TRUE or FALSE 5. Provide your confidence (0.0-1.0) and reasoning Example evaluation: Condition: "context.amount > 1000 && context.priority === 'high'" - context.amount: Looking at the context, amount is 2500 - context.priority: The priority field shows 'high' - Evaluation: 2500 > 1000 && 'high' === 'high' = TRUE && TRUE = TRUE - Confidence: 1.0 (values are explicitly in context) If a property is not explicitly in the context but can be inferred, explain your reasoning. For example, if checking "context.isUrgent" but only seeing deadlineDate, you might infer urgency from how close the deadline is. RESPOND WITH JSON: { "evaluations": [ { "index": 0, "condition": "the condition string", "result": true/false, "confidence": 0.0-1.0, "reasoning": "Step by step explanation", "extractedValues": { "property.path": "extracted or inferred value" } } ], "recommendedIndex": 0 or null if none match, "overallReasoning": "Summary of the evaluation process" }`; } /** * Parse LLM response for condition evaluation */ parseConditionEvaluationResponse(content, rules) { try { const jsonMatch = content.match(/\{[\s\S]*\}/); if (!jsonMatch) { throw new Error('No JSON found in LLM response'); } const parsed = JSON.parse(jsonMatch[0]); const evaluations = parsed.evaluations.map((e) => ({ condition: e.condition || rules[e.index]?.condition || '', result: Boolean(e.result), confidence: Math.max(0, Math.min(1, Number(e.confidence) || 0.5)), reasoning: String(e.reasoning || 'No reasoning provided'), extractedValues: e.extractedValues || {} })); // Find the first true condition with high confidence let matchedRule = null; const recommendedIndex = parsed.recommendedIndex; if (recommendedIndex !== null && recommendedIndex >= 0 && recommendedIndex < rules.length) { if (evaluations[recommendedIndex]?.result) { matchedRule = rules[recommendedIndex]; } } else { // Find first matching condition for (let i = 0; i < evaluations.length; i++) { if (evaluations[i].result && evaluations[i].confidence >= 0.7) { matchedRule = rules[i]; break; } } } return { matchedRule, evaluations, overallReasoning: String(parsed.overallReasoning || 'Conditions evaluated using LLM reasoning') }; } catch (error) { // If parsing fails, return no match return { matchedRule: null, evaluations: rules.map(r => ({ condition: r.condition, result: false, confidence: 0, reasoning: `Failed to parse LLM response: ${error instanceof Error ? error.message : String(error)}`, extractedValues: {} })), overallReasoning: 'LLM response parsing failed' }; } } /** * Simple fallback evaluation for conditions (when LLM is not available) */ evaluateConditionsSimple(rules, context) { const evaluations = rules.map(rule => { // Simple evaluation: only handle "true" condition const result = rule.condition.toLowerCase() === 'true'; return { condition: rule.condition, result, confidence: result ? 1.0 : 0.0, reasoning: result ? 'Always true condition' : 'Condition evaluation requires LLM', extractedValues: {} }; }); // Find first true condition const matchedIndex = evaluations.findIndex(e => e.result); const matchedRule = matchedIndex >= 0 ? rules[matchedIndex] : null; return { matchedRule, evaluations, overallReasoning: 'Simple evaluation without LLM - only "true" conditions supported' }; } /** * Validate structural correctness of the transition */ validateStructure(attempt) { const { fromState, action, toState, definition } = attempt; const currentStatedef = definition.states[fromState]; if (!currentStatedef) { return { valid: false, confidence: 0, reason: `Current state '${fromState}' not found in workflow definition` }; } if (currentStatedef.final) { return { valid: false, confidence: 0, reason: `Cannot transition from final state '${fromState}'` }; } const transitionRules = currentStatedef.transitions?.[action]; if (!transitionRules || transitionRules.length === 0) { return { valid: false, confidence: 0, reason: `Action '${action}' not valid for state '${fromState}'` }; } // Check if the target state is valid in any of the rules const validTargets = transitionRules.map(rule => rule.target); if (!validTargets.includes(toState)) { return { valid: false, confidence: 0, reason: `State '${toState}' is not a valid target for action '${action}' from state '${fromState}'. Valid targets: ${validTargets.join(', ')}` }; } return { valid: true, confidence: 1.0 }; } /** * Validate exit conditions of current state */ validateStateExit(attempt) { const { fromState, context, definition } = attempt; const validator = definition.stateValidators?.[fromState]; if (!validator?.exitConditions) { return null; } return validator.exitConditions(context); } /** * Validate entry conditions of target state */ validateStateEntry(attempt) { const { toState, context, definition } = attempt; const validator = definition.stateValidators?.[toState]; if (!validator?.entryConditions) { return null; } // Create projected context after transition const projectedContext = { ...context, ...attempt.data }; return validator.entryConditions(projectedContext); } /** * Validate transition-specific rules */ validateTransitionRules(attempt) { const { action, data, context, definition } = attempt; const validator = definition.transitionValidators?.[action]; if (!validator) { return null; } return validator(data, context); } /** * Validate data completeness and required fields */ validateDataCompleteness(attempt) { const { toState, context, data, definition } = attempt; const validator = definition.stateValidators?.[toState]; if (!validator?.requiredFields || validator.requiredFields.length === 0) { return { valid: true, confidence: 1.0 }; } // Check if required fields will be present after transition const projectedContext = { ...context, ...data }; const missingFields = validator.requiredFields.filter(field => !this.hasNestedProperty(projectedContext, field)); if (missingFields.length > 0) { return { valid: false, confidence: 0.5, reason: `Missing required fields for state '${toState}': ${missingFields.join(', ')}` }; } return { valid: true, confidence: 1.0 }; } /** * Check if an object has a nested property (supports dot notation) */ hasNestedProperty(obj, path) { const parts = path.split('.'); let current = obj; for (const part of parts) { if (current == null || typeof current !== 'object' || !(part in current)) { return false; } current = current[part]; } return current !== undefined; } /** * Generate human-readable reasoning for the decision */ generateReasoning(attempt, violations, confidence) { const { fromState, action, toState, definition } = attempt; if (violations.length === 0) { return `${error_formatter_js_1.ErrorFormatter.formatTransitionPath(fromState, action, toState, true)} validated successfully with ${(confidence * 100).toFixed(0)}% confidence`; } // Check if it's an invalid action error const invalidActionViolation = violations.find(v => v.includes("not valid for state")); if (invalidActionViolation) { const validActions = Object.keys(definition.states[fromState]?.transitions || {}); return error_formatter_js_1.ErrorFormatter.formatInvalidActionError(fromState, action, validActions, definition.name); } return `${error_formatter_js_1.ErrorFormatter.formatTransitionPath(fromState, action, toState, false)} rejected:\n${violations.map((v, i) => ` ${i + 1}. ${v}`).join('\n')}`; } /** * Generate helpful suggestions based on violations */ generateSuggestions(attempt, violations) { const suggestions = []; const { fromState, action, toState, data, context, definition } = attempt; for (const violation of violations) { if (violation.includes('Missing required fields')) { // Extract missing fields from violation message const match = violation.match(/Missing required fields[^:]*: (.+)/); if (match) { const missingFields = match[1].split(',').map(f => f.trim()); const errorMsg = error_formatter_js_1.ErrorFormatter.formatMissingFieldsError(missingFields, context, toState, action); suggestions.push(errorMsg); } else { suggestions.push('Include all required fields in the transition data'); } } else if (violation.includes('not valid for state')) { const validActions = Object.keys(definition.states[fromState]?.transitions || {}); if (validActions.length > 0) { suggestions.push(`Valid actions for state '${fromState}':\n${validActions.map(a => ` • ${a}`).join('\n')}`); // Add example usage suggestions.push(`Example: workflow.advance({ id: "...", action: "${validActions[0]}", data: { ... } })`); } else { suggestions.push(`State '${fromState}' has no available transitions (might be a final state)`); } } else if (violation.includes('Exit condition')) { suggestions.push(`Exit condition failed for state '${fromState}':\n - Review the current context to ensure all exit requirements are met\n - Check if any required processing is complete before transitioning`); // Show current context summary const contextKeys = Object.keys(context); if (contextKeys.length > 0) { suggestions.push(`Current context has: ${contextKeys.slice(0, 5).join(', ')}${contextKeys.length > 5 ? ' ...' : ''}`); } } else if (violation.includes('Entry condition')) { suggestions.push(`Entry condition failed for state '${toState}':\n - Ensure all prerequisites are satisfied\n - Check if required data is included in the transition`); // Show what data was provided if (data && Object.keys(data).length > 0) { suggestions.push(`Provided data: ${Object.keys(data).join(', ')}`); } else { suggestions.push('No data was provided with this transition'); } } else if (violation.includes('Confidence') && violation.includes('below minimum')) { const match = violation.match(/Confidence ([\d.]+) below minimum ([\d.]+)/); if (match) { suggestions.push(`Confidence too low (${match[1]} < ${match[2]}):\n - Provide more complete data\n - Ensure the transition makes logical sense\n - Check for any validation warnings`); } } } // If no specific suggestions were generated, provide general guidance if (suggestions.length === 0 && violations.length > 0) { suggestions.push('Review the workflow definition and ensure your transition meets all requirements'); suggestions.push(`Current state '${fromState}' expects specific conditions to transition to '${toState}'`); } return suggestions; } /** * Record decision in history */ async recordDecision(workflowId, decision) { const timestampedDecision = { ...decision, metadata: { ...decision.metadata, timestamp: new Date().toISOString() } }; // Keep in-memory cache for quick access const memoryHistory = this.decisionHistory.get(workflowId) || []; memoryHistory.push(timestampedDecision); // Keep only the last N entries in memory if (memoryHistory.length > this.MEMORY_CACHE_SIZE) { memoryHistory.splice(0, memoryHistory.length - this.MEMORY_CACHE_SIZE); } this.decisionHistory.set(workflowId, memoryHistory); // Append to file try { const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`); const logLine = JSON.stringify(timestampedDecision) + '\n'; await fs.appendFile(historyFile, logLine); } catch (error) { console.error(`Failed to write judge decision to file for workflow ${workflowId}:`, error); } } /** * Get decision history for a workflow with pagination */ async getDecisionHistory(workflowId, limit = 20, offset = 0) { // First check memory cache for recent entries const memoryHistory = this.decisionHistory.get(workflowId) || []; try { const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`); // Check if file exists try { await fs.access(historyFile); } catch { // File doesn't exist, return empty return { decisions: [], total: 0, hasMore: false }; } // Read file line by line const fileContent = await fs.readFile(historyFile, 'utf-8'); const lines = fileContent.trim().split('\n').filter(line => line); const totalCount = lines.length; // Apply pagination const startIndex = Math.max(0, totalCount - offset - limit); const endIndex = totalCount - offset; const selectedLines = lines.slice(startIndex, endIndex).reverse(); // Most recent first const decisions = []; for (const line of selectedLines) { try { decisions.push(JSON.parse(line)); } catch (error) { console.warn(`Failed to parse judge history line: ${line}`); } } return { decisions, total: totalCount, hasMore: startIndex > 0 }; } catch (error) { console.error(`Failed to read judge history for workflow ${workflowId}:`, error); // Fallback to memory cache return { decisions: memoryHistory.slice(-limit), total: memoryHistory.length, hasMore: false }; } } /** * Clear decision history for a workflow */ async clearHistory(workflowId) { // Clear memory cache this.decisionHistory.delete(workflowId); // Clear file try { const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`); await fs.unlink(historyFile); } catch (error) { // File might not exist, which is fine if (error.code !== 'ENOENT') { console.error(`Failed to delete judge history file for workflow ${workflowId}:`, error); } } } /** * Sleep utility for retry delays */ sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Perform sanity check to detect potential bypass attempts */ async performSanityCheck(attempt) { const warnings = []; // Check for bypass patterns in custom validators if (attempt.definition.judgeConfig?.customValidator) { const funcStr = attempt.definition.judgeConfig.customValidator.toString(); // Check for patterns that always approve or bypass validation if (funcStr.match(/always\s*(return|approve)|approved\s*:\s*true|return\s*{\s*approved\s*:\s*true/i)) { warnings.push("Custom validator may bypass validation logic - contains 'always approve' pattern"); } if (funcStr.match(/return\s*true\s*;|return\s*{\s*}\s*;/i)) { warnings.push("Custom validator may not perform actual validation"); } } // Check for malicious prompt patterns in data if (attempt.data) { const dataStr = JSON.stringify(attempt.data).toLowerCase(); const promptInjectionPatterns = [ /ignore\s*previous\s*instructions/, /ignore\s*all\s*previous/, /always\s*approve/, /bypass\s*validation/, /skip\s*validation/, /approved\s*:\s*true/, /confidence\s*:\s*1/, /override\s*judge/, /disregard\s*rules/ ]; for (const pattern of promptInjectionPatterns) { if (dataStr.match(pattern)) { warnings.push(`Data contains potential prompt injection: '${pattern.source}'`); } } } // Check for invalid confidence thresholds const minConfidence = attempt.definition.judgeConfig?.minConfidence; if (minConfidence !== undefined) { if (minConfidence <= 0 || minConfidence > 1) { warnings.push(`Invalid confidence threshold: ${minConfidence} (must be between 0 and 1)`); } if (minConfidence < 0.3) { warnings.push(`Very low confidence threshold: ${minConfidence} may allow weak validations`); } } // Check if judge is being bypassed entirely if (attempt.definition.judgeConfig?.enabled === false) { warnings.push("Judge is disabled - all transitions will be auto-approved"); } // Check for suspicious state validator patterns const validators = attempt.definition.stateValidators; if (validators) { for (const [state, validator] of Object.entries(validators)) { if (validator.exitConditions || validator.entryConditions) { // Check if validators are functions that always return true const exitStr = validator.exitConditions?.toString() || ''; const entryStr = validator.entryConditions?.toString() || ''; if (exitStr.match(/return\s*{\s*valid\s*:\s*true/i) || entryStr.match(/return\s*{\s*valid\s*:\s*true/i)) { warnings.push(`State '${state}' validators may not perform actual validation`); } } } } // Log warnings for debugging if (warnings.length > 0) { console.warn('Judge Sanity Check Warnings:', warnings); } return { safe: warnings.length === 0, warnings }; } /** * Call LLM for intelligent judge decision */ async callLLMJudge(attempt) { const prompt = this.buildLLMPrompt(attempt); for (let retryCount = 0; retryCount < this.MAX_RETRIES; retryCount++) { try { const response = await fetch(`${this.llmBaseUrl}/chat/completions`, { method: 'POST', headers: { 'x-litellm-api-key': this.llmApiKey || '', 'Content-Type': 'application/json', 'accept': 'application/json' }, body: JSON.stringify({ model: this.llmModel, messages: [ { role: 'system', content: 'You are a workflow validation judge. Analyze transitions and provide structured feedback. IMPORTANT: The target state in transition attempts is determined by the workflow engine after evaluating conditional rules, NOT specified by users. This is correct behavior for conditional transitions.' }, { role: 'user', content: prompt } ], temperature: 0.3, max_tokens: 64000 }) }); if (!response.ok) { // Check if error is retryable const isRetryable = [429, 502, 503, 504].includes(response.status); const isLastAttempt = retryCount === this.MAX_RETRIES - 1; if (!isRetryable || isLastAttempt) { throw new Error(`LLM API error: ${response.status} ${response.statusText}`); } // Calculate delay with exponential backoff const delay = this.RETRY_DELAY * Math.pow(this.RETRY_BACKOFF, retryCount); console.log(`LLM Judge: Received ${response.status} error, retrying after ${delay}ms (attempt ${retryCount + 2}/${this.MAX_RETRIES})...`); await this.sleep(delay); continue; // Retry the request } const data = await response.json(); console.log('LLM Response:', JSON.stringify(data, null, 2)); const content = data.choices?.[0]?.message?.content; if (!content) { throw new Error('No content in LLM response'); } console.log('LLM Content:', content); return this.parseLLMResponse(content, attempt); } catch (error) { const isLastAttempt = retryCount === this.MAX_RETRIES - 1; // Check if it's a network error (fetch failed) const isNetworkError = error instanceof TypeError && (error.message.includes('fetch') || error.message.includes('network')); // If it's not a network error or it's the last attempt, throw the error if (!isNetworkError || isLastAttempt) { const attemptInfo = retryCount > 0 ? ` after ${retryCount + 1} attempts` : ''; throw new Error(`LLM judge error${attemptInfo}: ${error instanceof Error ? error.message : String(error)}`); } // Network error and not last attempt - retry const delay = this.RETRY_DELAY * Math.pow(this.RETRY_BACKOFF, retryCount); console.log(`LLM Judge: Network error, retrying after ${delay}ms (attempt ${retryCount + 2}/${this.MAX_RETRIES})...`); await this.sleep(delay); } } // Should never reach here, but just in case throw new Error('LLM judge error: Max retries exceeded'); } /** * Build prompt for LLM judge */ buildLLMPrompt(attempt) { const { definition, fromState, action, toState, data, context } = attempt; return `Analyze this workflow transition attempt: WORKFLOW DEFINITION: - Name: ${definition.name} - Description: ${definition.description || 'No description'} - Current State: ${fromState} - Available Actions: ${Object.keys(definition.states[fromState]?.transitions || {}).join(', ')} TRANSITION ATTEMPT: - Action: ${action} - Target State: ${toState} (NOTE: This was determined by the workflow engine after evaluating conditional rules, NOT provided by the user) - Data Provided: ${JSON.stringify(data || {}, null, 2)} IMPORTANT: For transitions with conditional rules, the workflow engine evaluates the conditions and determines the target state automatically. The "Target State" shown above is the result of that evaluation, not a user input. This is the correct behavior for conditional transitions. CURRENT CONTEXT: ${JSON.stringify(context, null, 2)} WORKFLOW STATES: ${JSON.stringify(definition.states, null, 2)} VALIDATION REQUIREMENTS: ${definition.stateValidators ? JSON.stringify(definition.stateValidators, null, 2) : 'None specified'} TASK: 1. Analyze if this transition makes semantic sense given the workflow's purpose 2. Check if the provided data is complete and appropriate 3. Consider the current context and whether prerequisites are met 4. Evaluate if this follows best practices for this type of workflow 5. Verify that the determined target state is appropriate for the current context RESPOND WITH JSON: { "approved": true/false, "confidence": 0.0-1.0, "reasoning": "Clear explanation of decision", "violations": ["List", "of", "issues"] or [], "suggestions": ["Helpful", "fixes"] or [] } Be strict but fair. Consider both technical correctness and business logic.`; } /** * Parse LLM response into JudgeDecision */ parseLLMResponse(content, attempt) { try { // Try to extract JSON from the response const jsonMatch = content.match(/\{[\s\S]*\}/); if (!jsonMatch) { throw new Error('No JSON found in LLM response'); } const parsed = JSON.parse(jsonMatch[0]); // Validate and normalize the response const decision = { approved: Boolean(parsed.approved), confidence: Math.max(0, Math.min(1, Number(parsed.confidence) || 0.5)), reasoning: String(parsed.reasoning || 'No reasoning provided'), violations: Array.isArray(parsed.violations) ? parsed.violations : undefined, suggestions: Array.isArray(parsed.suggestions) ? parsed.suggestions : undefined, metadata: { llmModel: this.llmModel, thinkingMode: this.llmThinkingMode } }; // Apply strict mode if configured const config = attempt.definition.judgeConfig; if (config?.strictMode && config.minConfidence && decision.confidence < config.minConfidence) { decision.approved = false; decision.violations = decision.violations || []; decision.violations.push(`Confidence ${decision.confidence.toFixed(2)} below minimum ${config.minConfidence}`); } return decision; } catch (error) { // If parsing fails, return a conservative decision return { approved: false, confidence: 0, reasoning: `Failed to parse LLM response: ${error instanceof Error ? error.message : String(error)}`, violations: ['LLM response parsing failed'], suggestions: ['Check LLM configuration and try again'] }; } } } exports.JudgeEngine = JudgeEngine; //# sourceMappingURL=judge-engine.js.map