dfa-mcp-server
Version:
DFA-based workflow MCP server for guiding LLM task completion
928 lines (912 loc) • 41 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.JudgeEngine = void 0;
const fs = __importStar(require("fs/promises"));
const path = __importStar(require("path"));
const error_formatter_js_1 = require("./error-formatter.js");
class JudgeEngine {
decisionHistory = new Map();
llmBaseUrl;
llmApiKey;
llmModel;
llmThinkingMode;
historyDir;
// Retry configuration
MAX_RETRIES = 3;
RETRY_DELAY = 1000; // 1 second initial delay
RETRY_BACKOFF = 2; // Exponential backoff multiplier
// History configuration
MEMORY_CACHE_SIZE = 20; // Keep last N entries in memory per workflow
constructor(workflowDir = '.workflows') {
// Load LLM configuration from environment
this.llmBaseUrl = process.env.LLM_BASE_URL;
this.llmApiKey = process.env.LLM_API_KEY;
this.llmModel = process.env.LLM_JUDGE_MODEL || 'gemini-2.5-pro';
this.llmThinkingMode = process.env.LLM_JUDGE_THINKING_MODE || 'high';
// Set up history directory
this.historyDir = path.join(workflowDir, 'history');
}
/**
* Initialize the judge engine and create history directory
*/
async initialize() {
try {
await fs.mkdir(this.historyDir, { recursive: true });
}
catch (error) {
console.error('Failed to create history directory:', error);
}
}
/**
* Validate a transition attempt using the workflow's judge configuration
*/
async validateTransition(attempt) {
const { definition } = attempt;
// If judge is not enabled, approve everything
if (!definition.judgeConfig?.enabled) {
return {
approved: true,
confidence: 1.0,
reasoning: 'Judge not enabled for this workflow'
};
}
const config = definition.judgeConfig;
// Perform sanity check first
const sanityCheck = await this.performSanityCheck(attempt);
if (!sanityCheck.safe) {
console.warn('Judge Sanity Check failed with warnings:', sanityCheck.warnings);
// Add warnings to the decision metadata
if (config.strictMode) {
// In strict mode, reject transitions that fail sanity check
return {
approved: false,
confidence: 0,
reasoning: 'Transition rejected due to potential validation bypass attempt',
violations: sanityCheck.warnings,
suggestions: ['Remove suspicious patterns from validators and data'],
metadata: {
sanityCheckFailed: true,
warnings: sanityCheck.warnings
}
};
}
}
// Use LLM judge if configured and available
if (config.useLLM && this.llmBaseUrl && this.llmApiKey) {
console.log('LLM Judge: Attempting to use LLM validation');
console.log(`LLM Config: URL=${this.llmBaseUrl}, Model=${this.llmModel}`);
try {
const decision = await this.callLLMJudge(attempt);
console.log('LLM Judge: Successfully received decision');
// Add sanity check warnings to decision if any
if (sanityCheck.warnings.length > 0) {
decision.metadata = {
...decision.metadata,
sanityCheckWarnings: sanityCheck.warnings
};
}
return decision;
}
catch (error) {
console.error('LLM judge failed, falling back to structural validation:', error);
// Fall through to structural validation
}
}
else {
console.log(`LLM Judge: Not using LLM (useLLM=${config.useLLM}, hasUrl=${!!this.llmBaseUrl}, hasKey=${!!this.llmApiKey})`);
}
const violations = [];
const suggestions = [];
let totalConfidence = 0;
let validationCount = 0;
// 1. Validate structural correctness (40% weight)
const structuralResult = this.validateStructure(attempt);
totalConfidence += structuralResult.confidence * 0.4;
validationCount += 0.4;
if (!structuralResult.valid) {
violations.push(`Structural: ${structuralResult.reason}`);
}
// 2. Validate exit conditions of current state (15% weight)
const exitResult = this.validateStateExit(attempt);
if (exitResult) {
totalConfidence += exitResult.confidence * 0.15;
validationCount += 0.15;
if (!exitResult.valid) {
violations.push(`Exit condition: ${exitResult.reason}`);
suggestions.push('Ensure all exit conditions are met before transitioning');
}
}
// 3. Validate entry conditions of next state (15% weight)
const entryResult = this.validateStateEntry(attempt);
if (entryResult) {
totalConfidence += entryResult.confidence * 0.15;
validationCount += 0.15;
if (!entryResult.valid) {
violations.push(`Entry condition: ${entryResult.reason}`);
suggestions.push('Check that the target state prerequisites are satisfied');
}
}
// 4. Validate transition-specific rules (20% weight)
const transitionResult = this.validateTransitionRules(attempt);
if (transitionResult) {
totalConfidence += transitionResult.confidence * 0.2;
validationCount += 0.2;
if (!transitionResult.valid) {
violations.push(`Transition rule: ${transitionResult.reason}`);
}
}
// 5. Validate data completeness (10% weight)
const dataResult = this.validateDataCompleteness(attempt);
totalConfidence += dataResult.confidence * 0.1;
validationCount += 0.1;
if (!dataResult.valid) {
violations.push(`Data: ${dataResult.reason}`);
suggestions.push('Provide all required data fields for this action');
}
// 6. Run custom validation rules
if (config.validationRules) {
for (const rule of config.validationRules) {
const result = rule.validate(attempt);
if (!result.valid) {
violations.push(`${rule.name}: ${result.reason}`);
}
}
}
// 7. Run custom validator if provided
if (config.customValidator) {
return config.customValidator(attempt);
}
// Calculate final confidence
// Note: totalConfidence already has weighted values, no need to divide
const confidence = totalConfidence;
// Determine approval based on confidence and strict mode
let approved = violations.length === 0;
if (config.strictMode && config.minConfidence) {
approved = approved && confidence >= config.minConfidence;
if (confidence < config.minConfidence) {
violations.push(`Confidence ${confidence.toFixed(2)} below minimum ${config.minConfidence}`);
}
}
// Generate reasoning
const reasoning = this.generateReasoning(attempt, violations, confidence);
// Generate suggestions if not approved
if (!approved && suggestions.length === 0) {
suggestions.push(...this.generateSuggestions(attempt, violations));
}
const decision = {
approved,
confidence,
reasoning,
violations: violations.length > 0 ? violations : undefined,
suggestions: suggestions.length > 0 ? suggestions : undefined,
metadata: sanityCheck.warnings.length > 0 ? {
sanityCheckWarnings: sanityCheck.warnings
} : undefined
};
// Store decision in history
await this.recordDecision(attempt.workflowId, decision);
return decision;
}
/**
* Evaluate transition conditions using LLM to determine which rule matches
*/
async evaluateTransitionConditions(rules, context, attempt) {
// If LLM is not available, fall back to simple evaluation
if (!this.llmBaseUrl || !this.llmApiKey) {
return this.evaluateConditionsSimple(rules, context);
}
const prompt = this.buildConditionEvaluationPrompt(rules, context, attempt);
try {
const response = await fetch(`${this.llmBaseUrl}/chat/completions`, {
method: 'POST',
headers: {
'x-litellm-api-key': this.llmApiKey,
'Content-Type': 'application/json',
'accept': 'application/json'
},
body: JSON.stringify({
model: this.llmModel,
messages: [
{
role: 'system',
content: 'You are a workflow condition evaluator. Analyze conditions and determine which ones are true based on the context.'
},
{
role: 'user',
content: prompt
}
],
temperature: 0.2, // Low temperature for consistent evaluation
max_tokens: 4000
})
});
if (!response.ok) {
throw new Error(`LLM API error: ${response.status} ${response.statusText}`);
}
const data = await response.json();
const content = data.choices?.[0]?.message?.content;
if (!content) {
throw new Error('No content in LLM response');
}
return this.parseConditionEvaluationResponse(content, rules);
}
catch (error) {
console.error('LLM condition evaluation failed, falling back to simple evaluation:', error);
return this.evaluateConditionsSimple(rules, context);
}
}
/**
* Build prompt for LLM condition evaluation
*/
buildConditionEvaluationPrompt(rules, context, attempt) {
return `Evaluate these code-like conditions against the provided context.
Use your reasoning to understand what each condition is checking for.
Current State: ${attempt.fromState}
Action: ${attempt.action}
Workflow: ${attempt.definition.name}
Context Data:
${JSON.stringify(context, null, 2)}
Conditions to evaluate:
${rules.map((r, i) => `${i + 1}. "${r.condition}"${r.description ? ` // ${r.description}` : ''} -> target: "${r.target}"`).join('\n')}
For each condition:
1. Parse what properties and values it's checking
2. Find or intelligently derive those values from the context
3. Apply the logical operators (>, <, >=, <=, ===, !==, &&, ||)
4. Determine if the condition is TRUE or FALSE
5. Provide your confidence (0.0-1.0) and reasoning
Example evaluation:
Condition: "context.amount > 1000 && context.priority === 'high'"
- context.amount: Looking at the context, amount is 2500
- context.priority: The priority field shows 'high'
- Evaluation: 2500 > 1000 && 'high' === 'high' = TRUE && TRUE = TRUE
- Confidence: 1.0 (values are explicitly in context)
If a property is not explicitly in the context but can be inferred, explain your reasoning.
For example, if checking "context.isUrgent" but only seeing deadlineDate, you might infer urgency from how close the deadline is.
RESPOND WITH JSON:
{
"evaluations": [
{
"index": 0,
"condition": "the condition string",
"result": true/false,
"confidence": 0.0-1.0,
"reasoning": "Step by step explanation",
"extractedValues": {
"property.path": "extracted or inferred value"
}
}
],
"recommendedIndex": 0 or null if none match,
"overallReasoning": "Summary of the evaluation process"
}`;
}
/**
* Parse LLM response for condition evaluation
*/
parseConditionEvaluationResponse(content, rules) {
try {
const jsonMatch = content.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in LLM response');
}
const parsed = JSON.parse(jsonMatch[0]);
const evaluations = parsed.evaluations.map((e) => ({
condition: e.condition || rules[e.index]?.condition || '',
result: Boolean(e.result),
confidence: Math.max(0, Math.min(1, Number(e.confidence) || 0.5)),
reasoning: String(e.reasoning || 'No reasoning provided'),
extractedValues: e.extractedValues || {}
}));
// Find the first true condition with high confidence
let matchedRule = null;
const recommendedIndex = parsed.recommendedIndex;
if (recommendedIndex !== null && recommendedIndex >= 0 && recommendedIndex < rules.length) {
if (evaluations[recommendedIndex]?.result) {
matchedRule = rules[recommendedIndex];
}
}
else {
// Find first matching condition
for (let i = 0; i < evaluations.length; i++) {
if (evaluations[i].result && evaluations[i].confidence >= 0.7) {
matchedRule = rules[i];
break;
}
}
}
return {
matchedRule,
evaluations,
overallReasoning: String(parsed.overallReasoning || 'Conditions evaluated using LLM reasoning')
};
}
catch (error) {
// If parsing fails, return no match
return {
matchedRule: null,
evaluations: rules.map(r => ({
condition: r.condition,
result: false,
confidence: 0,
reasoning: `Failed to parse LLM response: ${error instanceof Error ? error.message : String(error)}`,
extractedValues: {}
})),
overallReasoning: 'LLM response parsing failed'
};
}
}
/**
* Simple fallback evaluation for conditions (when LLM is not available)
*/
evaluateConditionsSimple(rules, context) {
const evaluations = rules.map(rule => {
// Simple evaluation: only handle "true" condition
const result = rule.condition.toLowerCase() === 'true';
return {
condition: rule.condition,
result,
confidence: result ? 1.0 : 0.0,
reasoning: result ? 'Always true condition' : 'Condition evaluation requires LLM',
extractedValues: {}
};
});
// Find first true condition
const matchedIndex = evaluations.findIndex(e => e.result);
const matchedRule = matchedIndex >= 0 ? rules[matchedIndex] : null;
return {
matchedRule,
evaluations,
overallReasoning: 'Simple evaluation without LLM - only "true" conditions supported'
};
}
/**
* Validate structural correctness of the transition
*/
validateStructure(attempt) {
const { fromState, action, toState, definition } = attempt;
const currentStatedef = definition.states[fromState];
if (!currentStatedef) {
return {
valid: false,
confidence: 0,
reason: `Current state '${fromState}' not found in workflow definition`
};
}
if (currentStatedef.final) {
return {
valid: false,
confidence: 0,
reason: `Cannot transition from final state '${fromState}'`
};
}
const transitionRules = currentStatedef.transitions?.[action];
if (!transitionRules || transitionRules.length === 0) {
return {
valid: false,
confidence: 0,
reason: `Action '${action}' not valid for state '${fromState}'`
};
}
// Check if the target state is valid in any of the rules
const validTargets = transitionRules.map(rule => rule.target);
if (!validTargets.includes(toState)) {
return {
valid: false,
confidence: 0,
reason: `State '${toState}' is not a valid target for action '${action}' from state '${fromState}'. Valid targets: ${validTargets.join(', ')}`
};
}
return { valid: true, confidence: 1.0 };
}
/**
* Validate exit conditions of current state
*/
validateStateExit(attempt) {
const { fromState, context, definition } = attempt;
const validator = definition.stateValidators?.[fromState];
if (!validator?.exitConditions) {
return null;
}
return validator.exitConditions(context);
}
/**
* Validate entry conditions of target state
*/
validateStateEntry(attempt) {
const { toState, context, definition } = attempt;
const validator = definition.stateValidators?.[toState];
if (!validator?.entryConditions) {
return null;
}
// Create projected context after transition
const projectedContext = { ...context, ...attempt.data };
return validator.entryConditions(projectedContext);
}
/**
* Validate transition-specific rules
*/
validateTransitionRules(attempt) {
const { action, data, context, definition } = attempt;
const validator = definition.transitionValidators?.[action];
if (!validator) {
return null;
}
return validator(data, context);
}
/**
* Validate data completeness and required fields
*/
validateDataCompleteness(attempt) {
const { toState, context, data, definition } = attempt;
const validator = definition.stateValidators?.[toState];
if (!validator?.requiredFields || validator.requiredFields.length === 0) {
return { valid: true, confidence: 1.0 };
}
// Check if required fields will be present after transition
const projectedContext = { ...context, ...data };
const missingFields = validator.requiredFields.filter(field => !this.hasNestedProperty(projectedContext, field));
if (missingFields.length > 0) {
return {
valid: false,
confidence: 0.5,
reason: `Missing required fields for state '${toState}': ${missingFields.join(', ')}`
};
}
return { valid: true, confidence: 1.0 };
}
/**
* Check if an object has a nested property (supports dot notation)
*/
hasNestedProperty(obj, path) {
const parts = path.split('.');
let current = obj;
for (const part of parts) {
if (current == null || typeof current !== 'object' || !(part in current)) {
return false;
}
current = current[part];
}
return current !== undefined;
}
/**
* Generate human-readable reasoning for the decision
*/
generateReasoning(attempt, violations, confidence) {
const { fromState, action, toState, definition } = attempt;
if (violations.length === 0) {
return `${error_formatter_js_1.ErrorFormatter.formatTransitionPath(fromState, action, toState, true)} validated successfully with ${(confidence * 100).toFixed(0)}% confidence`;
}
// Check if it's an invalid action error
const invalidActionViolation = violations.find(v => v.includes("not valid for state"));
if (invalidActionViolation) {
const validActions = Object.keys(definition.states[fromState]?.transitions || {});
return error_formatter_js_1.ErrorFormatter.formatInvalidActionError(fromState, action, validActions, definition.name);
}
return `${error_formatter_js_1.ErrorFormatter.formatTransitionPath(fromState, action, toState, false)} rejected:\n${violations.map((v, i) => ` ${i + 1}. ${v}`).join('\n')}`;
}
/**
* Generate helpful suggestions based on violations
*/
generateSuggestions(attempt, violations) {
const suggestions = [];
const { fromState, action, toState, data, context, definition } = attempt;
for (const violation of violations) {
if (violation.includes('Missing required fields')) {
// Extract missing fields from violation message
const match = violation.match(/Missing required fields[^:]*: (.+)/);
if (match) {
const missingFields = match[1].split(',').map(f => f.trim());
const errorMsg = error_formatter_js_1.ErrorFormatter.formatMissingFieldsError(missingFields, context, toState, action);
suggestions.push(errorMsg);
}
else {
suggestions.push('Include all required fields in the transition data');
}
}
else if (violation.includes('not valid for state')) {
const validActions = Object.keys(definition.states[fromState]?.transitions || {});
if (validActions.length > 0) {
suggestions.push(`Valid actions for state '${fromState}':\n${validActions.map(a => ` • ${a}`).join('\n')}`);
// Add example usage
suggestions.push(`Example: workflow.advance({ id: "...", action: "${validActions[0]}", data: { ... } })`);
}
else {
suggestions.push(`State '${fromState}' has no available transitions (might be a final state)`);
}
}
else if (violation.includes('Exit condition')) {
suggestions.push(`Exit condition failed for state '${fromState}':\n - Review the current context to ensure all exit requirements are met\n - Check if any required processing is complete before transitioning`);
// Show current context summary
const contextKeys = Object.keys(context);
if (contextKeys.length > 0) {
suggestions.push(`Current context has: ${contextKeys.slice(0, 5).join(', ')}${contextKeys.length > 5 ? ' ...' : ''}`);
}
}
else if (violation.includes('Entry condition')) {
suggestions.push(`Entry condition failed for state '${toState}':\n - Ensure all prerequisites are satisfied\n - Check if required data is included in the transition`);
// Show what data was provided
if (data && Object.keys(data).length > 0) {
suggestions.push(`Provided data: ${Object.keys(data).join(', ')}`);
}
else {
suggestions.push('No data was provided with this transition');
}
}
else if (violation.includes('Confidence') && violation.includes('below minimum')) {
const match = violation.match(/Confidence ([\d.]+) below minimum ([\d.]+)/);
if (match) {
suggestions.push(`Confidence too low (${match[1]} < ${match[2]}):\n - Provide more complete data\n - Ensure the transition makes logical sense\n - Check for any validation warnings`);
}
}
}
// If no specific suggestions were generated, provide general guidance
if (suggestions.length === 0 && violations.length > 0) {
suggestions.push('Review the workflow definition and ensure your transition meets all requirements');
suggestions.push(`Current state '${fromState}' expects specific conditions to transition to '${toState}'`);
}
return suggestions;
}
/**
* Record decision in history
*/
async recordDecision(workflowId, decision) {
const timestampedDecision = {
...decision,
metadata: {
...decision.metadata,
timestamp: new Date().toISOString()
}
};
// Keep in-memory cache for quick access
const memoryHistory = this.decisionHistory.get(workflowId) || [];
memoryHistory.push(timestampedDecision);
// Keep only the last N entries in memory
if (memoryHistory.length > this.MEMORY_CACHE_SIZE) {
memoryHistory.splice(0, memoryHistory.length - this.MEMORY_CACHE_SIZE);
}
this.decisionHistory.set(workflowId, memoryHistory);
// Append to file
try {
const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`);
const logLine = JSON.stringify(timestampedDecision) + '\n';
await fs.appendFile(historyFile, logLine);
}
catch (error) {
console.error(`Failed to write judge decision to file for workflow ${workflowId}:`, error);
}
}
/**
* Get decision history for a workflow with pagination
*/
async getDecisionHistory(workflowId, limit = 20, offset = 0) {
// First check memory cache for recent entries
const memoryHistory = this.decisionHistory.get(workflowId) || [];
try {
const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`);
// Check if file exists
try {
await fs.access(historyFile);
}
catch {
// File doesn't exist, return empty
return { decisions: [], total: 0, hasMore: false };
}
// Read file line by line
const fileContent = await fs.readFile(historyFile, 'utf-8');
const lines = fileContent.trim().split('\n').filter(line => line);
const totalCount = lines.length;
// Apply pagination
const startIndex = Math.max(0, totalCount - offset - limit);
const endIndex = totalCount - offset;
const selectedLines = lines.slice(startIndex, endIndex).reverse(); // Most recent first
const decisions = [];
for (const line of selectedLines) {
try {
decisions.push(JSON.parse(line));
}
catch (error) {
console.warn(`Failed to parse judge history line: ${line}`);
}
}
return {
decisions,
total: totalCount,
hasMore: startIndex > 0
};
}
catch (error) {
console.error(`Failed to read judge history for workflow ${workflowId}:`, error);
// Fallback to memory cache
return {
decisions: memoryHistory.slice(-limit),
total: memoryHistory.length,
hasMore: false
};
}
}
/**
* Clear decision history for a workflow
*/
async clearHistory(workflowId) {
// Clear memory cache
this.decisionHistory.delete(workflowId);
// Clear file
try {
const historyFile = path.join(this.historyDir, `${workflowId}-judge.log`);
await fs.unlink(historyFile);
}
catch (error) {
// File might not exist, which is fine
if (error.code !== 'ENOENT') {
console.error(`Failed to delete judge history file for workflow ${workflowId}:`, error);
}
}
}
/**
* Sleep utility for retry delays
*/
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Perform sanity check to detect potential bypass attempts
*/
async performSanityCheck(attempt) {
const warnings = [];
// Check for bypass patterns in custom validators
if (attempt.definition.judgeConfig?.customValidator) {
const funcStr = attempt.definition.judgeConfig.customValidator.toString();
// Check for patterns that always approve or bypass validation
if (funcStr.match(/always\s*(return|approve)|approved\s*:\s*true|return\s*{\s*approved\s*:\s*true/i)) {
warnings.push("Custom validator may bypass validation logic - contains 'always approve' pattern");
}
if (funcStr.match(/return\s*true\s*;|return\s*{\s*}\s*;/i)) {
warnings.push("Custom validator may not perform actual validation");
}
}
// Check for malicious prompt patterns in data
if (attempt.data) {
const dataStr = JSON.stringify(attempt.data).toLowerCase();
const promptInjectionPatterns = [
/ignore\s*previous\s*instructions/,
/ignore\s*all\s*previous/,
/always\s*approve/,
/bypass\s*validation/,
/skip\s*validation/,
/approved\s*:\s*true/,
/confidence\s*:\s*1/,
/override\s*judge/,
/disregard\s*rules/
];
for (const pattern of promptInjectionPatterns) {
if (dataStr.match(pattern)) {
warnings.push(`Data contains potential prompt injection: '${pattern.source}'`);
}
}
}
// Check for invalid confidence thresholds
const minConfidence = attempt.definition.judgeConfig?.minConfidence;
if (minConfidence !== undefined) {
if (minConfidence <= 0 || minConfidence > 1) {
warnings.push(`Invalid confidence threshold: ${minConfidence} (must be between 0 and 1)`);
}
if (minConfidence < 0.3) {
warnings.push(`Very low confidence threshold: ${minConfidence} may allow weak validations`);
}
}
// Check if judge is being bypassed entirely
if (attempt.definition.judgeConfig?.enabled === false) {
warnings.push("Judge is disabled - all transitions will be auto-approved");
}
// Check for suspicious state validator patterns
const validators = attempt.definition.stateValidators;
if (validators) {
for (const [state, validator] of Object.entries(validators)) {
if (validator.exitConditions || validator.entryConditions) {
// Check if validators are functions that always return true
const exitStr = validator.exitConditions?.toString() || '';
const entryStr = validator.entryConditions?.toString() || '';
if (exitStr.match(/return\s*{\s*valid\s*:\s*true/i) ||
entryStr.match(/return\s*{\s*valid\s*:\s*true/i)) {
warnings.push(`State '${state}' validators may not perform actual validation`);
}
}
}
}
// Log warnings for debugging
if (warnings.length > 0) {
console.warn('Judge Sanity Check Warnings:', warnings);
}
return {
safe: warnings.length === 0,
warnings
};
}
/**
* Call LLM for intelligent judge decision
*/
async callLLMJudge(attempt) {
const prompt = this.buildLLMPrompt(attempt);
for (let retryCount = 0; retryCount < this.MAX_RETRIES; retryCount++) {
try {
const response = await fetch(`${this.llmBaseUrl}/chat/completions`, {
method: 'POST',
headers: {
'x-litellm-api-key': this.llmApiKey || '',
'Content-Type': 'application/json',
'accept': 'application/json'
},
body: JSON.stringify({
model: this.llmModel,
messages: [
{
role: 'system',
content: 'You are a workflow validation judge. Analyze transitions and provide structured feedback. IMPORTANT: The target state in transition attempts is determined by the workflow engine after evaluating conditional rules, NOT specified by users. This is correct behavior for conditional transitions.'
},
{
role: 'user',
content: prompt
}
],
temperature: 0.3,
max_tokens: 64000
})
});
if (!response.ok) {
// Check if error is retryable
const isRetryable = [429, 502, 503, 504].includes(response.status);
const isLastAttempt = retryCount === this.MAX_RETRIES - 1;
if (!isRetryable || isLastAttempt) {
throw new Error(`LLM API error: ${response.status} ${response.statusText}`);
}
// Calculate delay with exponential backoff
const delay = this.RETRY_DELAY * Math.pow(this.RETRY_BACKOFF, retryCount);
console.log(`LLM Judge: Received ${response.status} error, retrying after ${delay}ms (attempt ${retryCount + 2}/${this.MAX_RETRIES})...`);
await this.sleep(delay);
continue; // Retry the request
}
const data = await response.json();
console.log('LLM Response:', JSON.stringify(data, null, 2));
const content = data.choices?.[0]?.message?.content;
if (!content) {
throw new Error('No content in LLM response');
}
console.log('LLM Content:', content);
return this.parseLLMResponse(content, attempt);
}
catch (error) {
const isLastAttempt = retryCount === this.MAX_RETRIES - 1;
// Check if it's a network error (fetch failed)
const isNetworkError = error instanceof TypeError &&
(error.message.includes('fetch') || error.message.includes('network'));
// If it's not a network error or it's the last attempt, throw the error
if (!isNetworkError || isLastAttempt) {
const attemptInfo = retryCount > 0 ? ` after ${retryCount + 1} attempts` : '';
throw new Error(`LLM judge error${attemptInfo}: ${error instanceof Error ? error.message : String(error)}`);
}
// Network error and not last attempt - retry
const delay = this.RETRY_DELAY * Math.pow(this.RETRY_BACKOFF, retryCount);
console.log(`LLM Judge: Network error, retrying after ${delay}ms (attempt ${retryCount + 2}/${this.MAX_RETRIES})...`);
await this.sleep(delay);
}
}
// Should never reach here, but just in case
throw new Error('LLM judge error: Max retries exceeded');
}
/**
* Build prompt for LLM judge
*/
buildLLMPrompt(attempt) {
const { definition, fromState, action, toState, data, context } = attempt;
return `Analyze this workflow transition attempt:
WORKFLOW DEFINITION:
- Name: ${definition.name}
- Description: ${definition.description || 'No description'}
- Current State: ${fromState}
- Available Actions: ${Object.keys(definition.states[fromState]?.transitions || {}).join(', ')}
TRANSITION ATTEMPT:
- Action: ${action}
- Target State: ${toState} (NOTE: This was determined by the workflow engine after evaluating conditional rules, NOT provided by the user)
- Data Provided: ${JSON.stringify(data || {}, null, 2)}
IMPORTANT: For transitions with conditional rules, the workflow engine evaluates the conditions and determines the target state automatically. The "Target State" shown above is the result of that evaluation, not a user input. This is the correct behavior for conditional transitions.
CURRENT CONTEXT:
${JSON.stringify(context, null, 2)}
WORKFLOW STATES:
${JSON.stringify(definition.states, null, 2)}
VALIDATION REQUIREMENTS:
${definition.stateValidators ? JSON.stringify(definition.stateValidators, null, 2) : 'None specified'}
TASK:
1. Analyze if this transition makes semantic sense given the workflow's purpose
2. Check if the provided data is complete and appropriate
3. Consider the current context and whether prerequisites are met
4. Evaluate if this follows best practices for this type of workflow
5. Verify that the determined target state is appropriate for the current context
RESPOND WITH JSON:
{
"approved": true/false,
"confidence": 0.0-1.0,
"reasoning": "Clear explanation of decision",
"violations": ["List", "of", "issues"] or [],
"suggestions": ["Helpful", "fixes"] or []
}
Be strict but fair. Consider both technical correctness and business logic.`;
}
/**
* Parse LLM response into JudgeDecision
*/
parseLLMResponse(content, attempt) {
try {
// Try to extract JSON from the response
const jsonMatch = content.match(/\{[\s\S]*\}/);
if (!jsonMatch) {
throw new Error('No JSON found in LLM response');
}
const parsed = JSON.parse(jsonMatch[0]);
// Validate and normalize the response
const decision = {
approved: Boolean(parsed.approved),
confidence: Math.max(0, Math.min(1, Number(parsed.confidence) || 0.5)),
reasoning: String(parsed.reasoning || 'No reasoning provided'),
violations: Array.isArray(parsed.violations) ? parsed.violations : undefined,
suggestions: Array.isArray(parsed.suggestions) ? parsed.suggestions : undefined,
metadata: {
llmModel: this.llmModel,
thinkingMode: this.llmThinkingMode
}
};
// Apply strict mode if configured
const config = attempt.definition.judgeConfig;
if (config?.strictMode && config.minConfidence && decision.confidence < config.minConfidence) {
decision.approved = false;
decision.violations = decision.violations || [];
decision.violations.push(`Confidence ${decision.confidence.toFixed(2)} below minimum ${config.minConfidence}`);
}
return decision;
}
catch (error) {
// If parsing fails, return a conservative decision
return {
approved: false,
confidence: 0,
reasoning: `Failed to parse LLM response: ${error instanceof Error ? error.message : String(error)}`,
violations: ['LLM response parsing failed'],
suggestions: ['Check LLM configuration and try again']
};
}
}
}
exports.JudgeEngine = JudgeEngine;
//# sourceMappingURL=judge-engine.js.map