cmte
Version:
Design by Committee™ except it's just you and LLMs
491 lines (439 loc) • 24.8 kB
JavaScript
/**
* SetExecutor - Executes a set of tasks in parallel
*/
import path from 'path';
import { logger } from '../../utils/logger.js';
import fs from 'fs/promises';
import chalk from 'chalk'; // Import chalk
import getClaudeClient from '../llm/claude-adapter.js';
import getLocalLLMClient from '../llm/local-llm-adapter.js';
import { OutputReferenceResolver } from '../task/output-reference-resolver.js';
import { FileCollectionManager } from '../file-collection-manager.js'; // Import FileCollectionManager
import { TemplateRenderer } from '../template-renderer.js'; // CORRECTED Import Path
import { BaseLLMClient } from '../llm/base-llm-client.js';
import { ComponentRegistry as Registry } from '../components/registry.js'; // Corrected import
import { toXML as convertMarkdownToXML } from '../../utils/llmxml.js'; // Import our wrapper
// Import WorkflowExecutor for type checking (optional but good practice)
import { WorkflowExecutor } from './workflow-executor.js';
// Import the new utility function
import { getNestedProperty } from '../../utils/nested-property.js';
/**
* Helper function to recursively resolve promises in an object
*/
async function resolvePromisesInObject(obj) {
if (!obj || typeof obj !== 'object') {
return obj;
}
if (obj instanceof Promise) {
return await obj;
}
if (Array.isArray(obj)) {
const resolvedArray = await Promise.all(obj.map(resolvePromisesInObject));
return resolvedArray;
}
const resolvedObj = {};
for (const key in obj) {
if (Object.hasOwnProperty.call(obj, key)) {
resolvedObj[key] = await resolvePromisesInObject(obj[key]);
}
}
return resolvedObj;
}
/**
* Simplified executor for set components
*/
export class SetExecutor {
/**
* @param {Registry} options.registry
* @param {WorkflowExecutor} options.workflowExecutor - The parent workflow executor instance.
* @param {object} options.initialContext - Initial context for the set.
* @param {object} options.config - Set-specific configuration (e.g., definition).
* @param {object} sharedComponents - Shared components like file manager, resolvers.
*/
constructor(setDefinition, registry, workflowExecutor, initialContext = {}, setIndex = -1, sharedComponents = {}) {
if (!setDefinition) throw new Error('Set definition is required');
if (!registry) throw new Error('Registry is required');
if (!workflowExecutor) throw new Error('WorkflowExecutor instance is required'); // Added validation
this.setDefinition = setDefinition;
this.registry = registry;
this.workflowExecutor = workflowExecutor; // Store the workflow executor
this.initialContext = initialContext;
// Store the set index
this.setIndex = setIndex;
// Extract shared components and executor options from workflowExecutor
this.fileCollectionManager = workflowExecutor.fileCollectionManager;
this.outputReferenceResolver = workflowExecutor.outputReferenceResolver;
// Create ONE renderer instance for the SetExecutor to use
this.renderer = new TemplateRenderer(
this.fileCollectionManager, // Pass shared manager instance
this.outputReferenceResolver, // Pass shared resolver instance
this.workflowExecutor // <<< PASS the workflow executor instance
);
this.savePrompts = workflowExecutor.savePrompts;
this.dryRun = workflowExecutor.dryRun;
this.apiDryRun = workflowExecutor.apiDryRun;
this.lite = workflowExecutor.lite;
this.useLocalLLM = workflowExecutor.useLocalLLM;
this.llmClient = workflowExecutor.llmClient; // Use the client from workflow executor
this.mockTaskExecution = workflowExecutor.mockTaskExecution;
this.modelConfig = workflowExecutor.modelConfig;
this.workflowPath = workflowExecutor.workflowPath; // Needed for path resolution?
this.outputPath = workflowExecutor.rawOutputPath; // Use raw path for potential saves
this.isMultiRun = workflowExecutor.isMultiRun; // Get flag from workflow executor
// Set name can be derived (will be overridden for iterations)
this.baseSetName = setDefinition.name || 'UnnamedSet';
logger.debug('SetExecutor initialized', {
baseSetName: this.baseSetName,
dryRun: this.dryRun
});
}
/**
* Executes the set once without iteration.
* @param {Object} initialContext - The initial context for this specific run.
* @returns {Promise<{ outputs: object }>} Object containing the final outputs map.
*/
async executeOnce(initialContext = {}) {
// Revert to calling the main execute method
logger.debug(`SetExecutor.executeOnce starting for set: ${this.setDefinition.name} with initial context keys: ${Object.keys(initialContext).join(', ')}`);
return this.execute(initialContext, null); // Pass initialContext, null for iterationKey
}
/**
* Executes the set for each item in a collection in parallel.
* @param {Array<object>} itemsToIterate - Array of items (e.g., {key, value} pairs or file objects).
* @returns {Promise<{ outputs: object }>} Object containing the final outputs map aggregated across iterations.
*/
async executeForEach(itemsToIterate) {
let aggregatedOutputs = {};
const iterationPromises = []; // Array to hold promises for each iteration
for (const [index, loopItem] of itemsToIterate.entries()) {
// Determine iterationKey and context item structure
let iterationKey;
let currentItemForContext;
// Check if loopItem is our { key, value } structure from object iteration
if (loopItem && typeof loopItem === 'object' && loopItem.hasOwnProperty('key') && loopItem.hasOwnProperty('value')) {
iterationKey = String(loopItem.key);
currentItemForContext = { key: loopItem.key, value: loopItem.value };
}
// Handle array iteration (loopItem is the array element)
else {
currentItemForContext = loopItem; // The array element is the item
// Generate key from item content if possible, otherwise index
if (typeof loopItem === 'object' && loopItem !== null) {
if (loopItem.id) iterationKey = String(loopItem.id);
else if (loopItem.name) iterationKey = String(loopItem.name);
// Fallback for file iteration (where name might not be unique enough)
else if (loopItem.path) iterationKey = String(loopItem.path).replace(/[/\\?%*:|"<>]/g, '-');
else iterationKey = String(index);
} else {
// For primitive array elements, use index as key
iterationKey = String(index);
}
// Wrap the array element in the standard 'item' structure if needed?
// Aiming for consistency: item.key, item.value
currentItemForContext = { key: iterationKey, value: loopItem };
}
logger.debug(`Preparing iteration ${index} with key: ${iterationKey}`, { baseSetName: this.baseSetName });
// Construct the context for this iteration
const iterationContext = {
...this.initialContext,
item: currentItemForContext,
this: currentItemForContext, // Alias for 'item'
...(this.setDefinition.variables || {}) // Add any set-level variables
};
// Create the promise for this iteration's execution and add it to the array
// IMPORTANT: We use an async IIFE to capture the current iteration variables correctly
const iterationPromise = (async (iterKey, iterContext, iterItem) => {
logger.debug(`Starting execution promise for iteration key: ${iterKey}`, { baseSetName: this.baseSetName });
// Execute the set for this iteration
// Note: executeIteration now needs slight modification or careful context handling if it relies on instance properties set sequentially
// For now, assuming executeIteration is safe to run in parallel with the context passed.
const result = await this.executeIteration(iterContext, iterItem, iterKey);
logger.debug(`Finished execution promise for iteration key: ${iterKey}`, { baseSetName: this.baseSetName });
return result; // Return the result for Promise.all
})(iterationKey, iterationContext, currentItemForContext); // Pass current values to IIFE
iterationPromises.push(iterationPromise);
}
// Execute all iteration promises in parallel
logger.info(`Executing ${iterationPromises.length} iterations in parallel for set ${this.baseSetName}...`);
const iterationResults = await Promise.all(iterationPromises);
// Process results after all iterations have completed
for (const result of iterationResults) {
// Merge outputs - simple merge, last write wins for same key across iterations
// This might need refinement depending on how outputs from different iterations should be combined.
if (result && result.outputs) {
aggregatedOutputs = { ...aggregatedOutputs, ...result.outputs };
}
}
logger.info(`Finished all parallel iterations for set ${this.baseSetName}`);
return { outputs: aggregatedOutputs };
}
/**
* Executes the set logic for a specific iteration.
* @param {object} context - The base context (workflow level).
* @param {object} item - The current item being iterated over (e.g., { key: 'item1', value: {...} } or just the value).
* @param {string} iterationKey - The key identifying the current iteration.
* @returns {Promise<{ outputs: object }>} Object containing the final outputs map for this iteration.
*/
async executeIteration(context, item, iterationKey) {
// Set the iteration context for the resolver
this.outputReferenceResolver.setIterationContext(iterationKey);
// Create iteration-specific context
const iterationContext = {
...context,
item, // Make the whole item available
this: item.value !== undefined ? item.value : item // Allow direct access via 'this' if item is simple
};
// Execute using the iteration context
const result = await this.execute(iterationContext, iterationKey);
// Clear the iteration context after use
this.outputReferenceResolver.clearIterationContext();
return result;
}
/**
* Core execution logic for the set, used by executeOnce and executeIteration.
* @param {Object} context - The context for this specific execution (initial or iteration).
* @param {string|null} iterationKey - The iteration key if applicable.
* @returns {Promise<{ outputs: object }>} Object containing the final outputs map.
*/
async execute(context = {}, iterationKey = null) {
const effectiveSetName = iterationKey ? `${this.setDefinition.name}[${iterationKey}]` : this.setDefinition.name;
logger.debug(`Executing set logic: ${this.setDefinition.name}`, { setName: effectiveSetName });
// Call the actual implementation methods directly, passing the source name
if (this.setDefinition.requiredInput) {
this.validateRequiredInput(this.setDefinition.requiredInput, context, effectiveSetName);
}
// Execute tasks defined in the set definition using the provided context
const taskOutputs = await this.executeTasks(this.setDefinition.tasks || [], context, iterationKey, effectiveSetName);
// Call the actual implementation methods directly, passing the source name
if (this.setDefinition.requiredOutput) {
this.validateRequiredOutput(this.setDefinition.requiredOutput, taskOutputs, effectiveSetName);
}
// For iteration, we might want to wrap the outputs differently, but for now return flat map.
logger.debug(`Completed set logic execution: ${this.setDefinition.name}`, { setName: effectiveSetName, outputKeys: Object.keys(taskOutputs) });
return { outputs: taskOutputs };
}
/**
* Execute a single task within the set's context.
* @param {string} taskName - Name of the task to execute.
* @param {Object} context - Task context (includes workflow/set/iteration variables).
* @param {string|null} iterationKey - Optional iteration key.
* @param {string} effectiveSetName - The actual set name (including iteration key if applicable).
* @returns {Promise<any>} Task execution result (LLM output string).
*/
async executeTask(taskName, taskRef, context, iterationKey, effectiveSetName) {
// --- Refined User-Facing Log (Action First) ---
const workflowPrefix = this.isMultiRun ? chalk.dim(` ${path.basename(path.dirname(this.workflowPath))} :`) : '';
const setPrefix = chalk.dim(` ${this.baseSetName} :`);
const iterStr = iterationKey ? chalk.dim(` [${iterationKey}]`) : '';
// Updated Format: START <workflow> : <set> : <task> [<iter>]
console.log(chalk.blue.bold(`START`) + workflowPrefix + setPrefix + chalk.blue(` ${taskName}`) + iterStr);
// --- END User-Facing Log ---
logger.debug(`Executing task: ${taskName}`, { setName: effectiveSetName });
// Add logging for the context parameter received by executeTask
logger.debug(`SetExecutor.executeTask received context keys: ${Object.keys(context).join(', ')}`);
try {
logger.debug(`SetExecutor.executeTask received context object: ${JSON.stringify(context)}`);
} catch (e) {
logger.error('Failed to stringify context in executeTask');
}
let responseContent; // Define outside try block
try {
// Load task definition
const { task, content: taskTemplate } = await this.registry.loadTask(taskName);
// Resolve prior outputs and merge into a temporary context for this task only
let taskSpecificContext = { ...context }; // Start with the set/iteration context
let resolvedPriorOutputs = {}; // Keep track of resolved prior outputs separately
if (taskRef.prior_outputs && typeof taskRef.prior_outputs === 'object') {
logger.debug(`Resolving prior_outputs for task '${taskName}'`, { config: taskRef.prior_outputs });
for (const [localName, reference] of Object.entries(taskRef.prior_outputs)) {
let resolvedValue = undefined; // Default to undefined
try {
const referenceString = reference.replace(/^\{\{/, '').replace(/\}\}/, '').trim();
// *** DRY RUN CHECK FOR [this] ***
if (this.dryRun && referenceString.includes('[this]')) {
// Silently skip resolution for [this] in dry run
// Do not log or add a dryRunIssue, as this is expected.
logger.debug(`[DRY RUN] Skipping prior_outputs resolution for '${localName}' due to [this] reference.`);
resolvedValue = undefined; // Still pass undefined to renderer
} else {
// Attempt resolution only if not dry run or if it doesn't contain [this]
resolvedValue = this.outputReferenceResolver.resolveReference(referenceString, null);
}
logger.debug(`[SE_RESOLVE] Resolved prior_output '${localName}' ('${referenceString}') to value: ${JSON.stringify(resolvedValue)} (Type: ${typeof resolvedValue})`);
resolvedPriorOutputs[localName] = resolvedValue;
} catch (error) {
const errorMsg = `Failed to resolve prior_output reference '${reference}' for local name '${localName}'`;
if (!this.dryRun) {
logger.error(errorMsg, { error: error.message });
// Potentially make this fatal in non-dry run?
} else {
// Log as debug internally, add an ERROR issue for the summary
const dryRunMessage = `${errorMsg}. Reason: ${error.message} (Task output expected but not found in dry run).`;
logger.debug(`[DRY RUN] ${dryRunMessage}`); // Keep internal log as debug
this.workflowExecutor.addDryRunIssue('error', dryRunMessage); // <<< Use ERROR type
}
resolvedPriorOutputs[localName] = undefined;
}
}
logger.debug(`Resolved prior outputs for '${taskName}'. Keys: ${Object.keys(resolvedPriorOutputs).join(', ')}`);
}
// Create the final context for rendering just before calling the renderer
const renderContext = { ...context, ...resolvedPriorOutputs };
logger.debug(`Final render context keys for '${taskName}': ${Object.keys(renderContext).join(', ')}`);
// Add detailed logging of the actual context object
try {
logger.debug(`Final render context object: ${JSON.stringify(renderContext)}`);
} catch (e) {
logger.error('Failed to stringify renderContext', { keys: Object.keys(renderContext) });
}
// Render the prompt template using the instance stored in the constructor
const renderer = this.renderer; // Use the instance created in the constructor
const renderedPrompt = await renderer.render(taskTemplate, renderContext, this.dryRun);
// Convert the rendered Markdown prompt to LLM-XML format
let xmlPromptContent;
try {
xmlPromptContent = await convertMarkdownToXML(renderedPrompt);
logger.debug('Successfully converted Markdown prompt to LLMXML', { taskName, setName: effectiveSetName });
} catch (xmlError) {
logger.error('Failed to convert Markdown prompt to LLMXML', {
taskName,
setName: effectiveSetName,
error: xmlError.message,
markdownPrompt: renderedPrompt.substring(0, 200) + '...'
});
throw new Error(`LLMXML conversion failed for task ${taskName}: ${xmlError.message}`);
}
// --- Mock Execution Check ---
if (this.mockTaskExecution) {
logger.info(`MOCK EXECUTION: Skipping LLM call for task ${taskName}`, { setName: effectiveSetName });
responseContent = `[Mock Output for task ${taskName} (Set: ${effectiveSetName})]`;
} else if (this.dryRun) {
// Skip LLM call in dry run mode
logger.info(`DRY RUN: Skipping LLM call for task ${taskName}`, { setName: effectiveSetName });
responseContent = `[DRY RUN] This would be the response from task ${taskName} in set ${effectiveSetName}.\nPrompt preview: ${xmlPromptContent.substring(0, 200)}...`;
} else {
// --- Actual LLM Call ---
const messages = [{ role: 'user', content: xmlPromptContent }];
try {
responseContent = await this.llmClient.completeMessages(messages, this.modelConfig);
logger.debug(`LLM response received for task ${taskName}`, {
setName: effectiveSetName,
type: typeof responseContent,
contentPreview: typeof responseContent === 'string' ? responseContent.substring(0, 100) + '...' : '[non-string]'
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
logger.error(`LLM call failed for task ${taskName}: ${errorMessage}`, { setName: effectiveSetName, error: errorMessage });
// Add task context to the error
error.task = taskName;
throw error; // Re-throw after logging
}
}
// --- End LLM Call / Mock ---
// --- Record Output ---
if (this.workflowExecutor && typeof this.workflowExecutor.recordTaskOutput === 'function') {
this.workflowExecutor.recordTaskOutput(this.setIndex, this.baseSetName, taskName, iterationKey, responseContent);
} else {
logger.warn('WorkflowExecutor or recordTaskOutput method not available for recording output.', { setName: effectiveSetName, taskName });
}
// --- End Record Output ---
// --- Save Prompt/Output (if enabled) ---
if (this.savePrompts) {
try {
// Use effectiveSetName which includes iteration key if present for path safety
const safeSetName = effectiveSetName.replace(/[/\\?%*:|\"<>]/g, '-');
const promptFileName = `${taskName}.prompt.xml`; // Save XML prompt
// Changed outputDir to be relative to workflow dir, in a top-level 'prompts' folder
const outputDir = path.resolve(path.dirname(this.workflowPath), 'prompts', safeSetName);
const finalPromptPath = path.join(outputDir, promptFileName);
await fs.mkdir(outputDir, { recursive: true });
await fs.writeFile(finalPromptPath, xmlPromptContent);
logger.debug(`Saved task prompt`, { taskName, directory: outputDir, promptFile: promptFileName });
} catch (saveError) {
logger.error(`Failed to save prompt for task ${taskName}`, {
setName: effectiveSetName, error: saveError.message
});
// Don't fail the whole task, just log the save error
}
}
// --- End Save Prompt/Output ---
return responseContent; // Return the successful response
} catch (error) {
logger.error(`Failed task: ${taskName}`, { setName: effectiveSetName, error: error.message });
// Add task context if missing
if (!error.task) {
error.task = taskName;
}
throw error; // Propagate error up
}
}
/**
* Executes a list of tasks in parallel.
* @param {Array<object>} tasks - Array of task references (e.g., { useTask: 'taskName', outputAs: 'varName' }).
* @param {Object} context - The context to pass to each task.
* @param {string|null} iterationKey - The current iteration key, if any.
* @param {string} effectiveSetName - The set name (potentially including iteration key).
* @returns {Promise<Object>} A map of task output variable names to their results.
*/
async executeTasks(tasks, context, iterationKey, effectiveSetName) {
if (!tasks || tasks.length === 0) {
logger.debug('No tasks defined in set.', { setName: effectiveSetName });
return {}; // No tasks, return empty outputs
}
const taskPromises = tasks.map(taskRef => { // Pass taskRef down
const taskName = taskRef.useTask;
if (!taskName) {
throw new Error('Task reference is missing `useTask` property');
}
// Execute the task and associate result with its designated output variable name
// Pass the full taskRef to executeTask so it can access prior_outputs
return this.executeTask(taskName, taskRef, context, iterationKey, effectiveSetName)
.then(result => {
// ALWAYS use the task name as the output key, ignore outputAs
logger.debug(`Mapping result for task '${taskName}' to output key '${taskName}'`, { setName: effectiveSetName });
return { [taskName]: result };
})
.catch(error => {
logger.error(`Task execution promise failed: ${taskName}`, { setName: effectiveSetName, error: error.message });
// Propagate the error to Promise.all
throw error;
});
});
try {
const resultsArray = await Promise.all(taskPromises);
// Combine results from [{taskName1: res1}, {taskName2: res2}] into {taskName1: res1, taskName2: res2}
const combinedResults = resultsArray.reduce((acc, current) => ({ ...acc, ...current }), {});
logger.debug('All parallel tasks completed.', { setName: effectiveSetName, outputKeys: Object.keys(combinedResults) });
return combinedResults;
} catch (error) {
logger.error('One or more tasks failed during parallel execution.', { setName: effectiveSetName, error: error.message });
// Error should already have task/set context added by executeTask
throw error; // Re-throw the first error encountered by Promise.all
}
}
/**
* Validate required inputs are present in context, supporting dot notation.
* @param {Array<string>} required - Required input fields (can use dot notation).
* @param {Object} context - Context to validate.
* @param {string} source - Source of the requirement for error messages.
*/
validateRequiredInput(required, context, source) {
// Use the imported utility function
const missing = required.filter(key => getNestedProperty(context, key) === undefined);
if (missing.length > 0) {
throw new Error(`Set [${source}] missing required input variables: ${missing.join(', ')}`);
}
}
/**
* Validate required outputs are present in result
* @param {Array<string>} required - Required output fields
* @param {Object} result - Result to validate
* @param {string} source - Source of the requirement for error messages
*/
validateRequiredOutput(required, result, source) {
const missing = required.filter(key => !(key in result));
if (missing.length > 0) {
throw new Error(`Set [${source}] missing required output variables: ${missing.join(', ')}`);
}
}
}