UNPKG

cmte

Version:

Design by Committee™ except it's just you and LLMs

491 lines (439 loc) 24.8 kB
/** * SetExecutor - Executes a set of tasks in parallel */ import path from 'path'; import { logger } from '../../utils/logger.js'; import fs from 'fs/promises'; import chalk from 'chalk'; // Import chalk import getClaudeClient from '../llm/claude-adapter.js'; import getLocalLLMClient from '../llm/local-llm-adapter.js'; import { OutputReferenceResolver } from '../task/output-reference-resolver.js'; import { FileCollectionManager } from '../file-collection-manager.js'; // Import FileCollectionManager import { TemplateRenderer } from '../template-renderer.js'; // CORRECTED Import Path import { BaseLLMClient } from '../llm/base-llm-client.js'; import { ComponentRegistry as Registry } from '../components/registry.js'; // Corrected import import { toXML as convertMarkdownToXML } from '../../utils/llmxml.js'; // Import our wrapper // Import WorkflowExecutor for type checking (optional but good practice) import { WorkflowExecutor } from './workflow-executor.js'; // Import the new utility function import { getNestedProperty } from '../../utils/nested-property.js'; /** * Helper function to recursively resolve promises in an object */ async function resolvePromisesInObject(obj) { if (!obj || typeof obj !== 'object') { return obj; } if (obj instanceof Promise) { return await obj; } if (Array.isArray(obj)) { const resolvedArray = await Promise.all(obj.map(resolvePromisesInObject)); return resolvedArray; } const resolvedObj = {}; for (const key in obj) { if (Object.hasOwnProperty.call(obj, key)) { resolvedObj[key] = await resolvePromisesInObject(obj[key]); } } return resolvedObj; } /** * Simplified executor for set components */ export class SetExecutor { /** * @param {Registry} options.registry * @param {WorkflowExecutor} options.workflowExecutor - The parent workflow executor instance. * @param {object} options.initialContext - Initial context for the set. * @param {object} options.config - Set-specific configuration (e.g., definition). * @param {object} sharedComponents - Shared components like file manager, resolvers. */ constructor(setDefinition, registry, workflowExecutor, initialContext = {}, setIndex = -1, sharedComponents = {}) { if (!setDefinition) throw new Error('Set definition is required'); if (!registry) throw new Error('Registry is required'); if (!workflowExecutor) throw new Error('WorkflowExecutor instance is required'); // Added validation this.setDefinition = setDefinition; this.registry = registry; this.workflowExecutor = workflowExecutor; // Store the workflow executor this.initialContext = initialContext; // Store the set index this.setIndex = setIndex; // Extract shared components and executor options from workflowExecutor this.fileCollectionManager = workflowExecutor.fileCollectionManager; this.outputReferenceResolver = workflowExecutor.outputReferenceResolver; // Create ONE renderer instance for the SetExecutor to use this.renderer = new TemplateRenderer( this.fileCollectionManager, // Pass shared manager instance this.outputReferenceResolver, // Pass shared resolver instance this.workflowExecutor // <<< PASS the workflow executor instance ); this.savePrompts = workflowExecutor.savePrompts; this.dryRun = workflowExecutor.dryRun; this.apiDryRun = workflowExecutor.apiDryRun; this.lite = workflowExecutor.lite; this.useLocalLLM = workflowExecutor.useLocalLLM; this.llmClient = workflowExecutor.llmClient; // Use the client from workflow executor this.mockTaskExecution = workflowExecutor.mockTaskExecution; this.modelConfig = workflowExecutor.modelConfig; this.workflowPath = workflowExecutor.workflowPath; // Needed for path resolution? this.outputPath = workflowExecutor.rawOutputPath; // Use raw path for potential saves this.isMultiRun = workflowExecutor.isMultiRun; // Get flag from workflow executor // Set name can be derived (will be overridden for iterations) this.baseSetName = setDefinition.name || 'UnnamedSet'; logger.debug('SetExecutor initialized', { baseSetName: this.baseSetName, dryRun: this.dryRun }); } /** * Executes the set once without iteration. * @param {Object} initialContext - The initial context for this specific run. * @returns {Promise<{ outputs: object }>} Object containing the final outputs map. */ async executeOnce(initialContext = {}) { // Revert to calling the main execute method logger.debug(`SetExecutor.executeOnce starting for set: ${this.setDefinition.name} with initial context keys: ${Object.keys(initialContext).join(', ')}`); return this.execute(initialContext, null); // Pass initialContext, null for iterationKey } /** * Executes the set for each item in a collection in parallel. * @param {Array<object>} itemsToIterate - Array of items (e.g., {key, value} pairs or file objects). * @returns {Promise<{ outputs: object }>} Object containing the final outputs map aggregated across iterations. */ async executeForEach(itemsToIterate) { let aggregatedOutputs = {}; const iterationPromises = []; // Array to hold promises for each iteration for (const [index, loopItem] of itemsToIterate.entries()) { // Determine iterationKey and context item structure let iterationKey; let currentItemForContext; // Check if loopItem is our { key, value } structure from object iteration if (loopItem && typeof loopItem === 'object' && loopItem.hasOwnProperty('key') && loopItem.hasOwnProperty('value')) { iterationKey = String(loopItem.key); currentItemForContext = { key: loopItem.key, value: loopItem.value }; } // Handle array iteration (loopItem is the array element) else { currentItemForContext = loopItem; // The array element is the item // Generate key from item content if possible, otherwise index if (typeof loopItem === 'object' && loopItem !== null) { if (loopItem.id) iterationKey = String(loopItem.id); else if (loopItem.name) iterationKey = String(loopItem.name); // Fallback for file iteration (where name might not be unique enough) else if (loopItem.path) iterationKey = String(loopItem.path).replace(/[/\\?%*:|"<>]/g, '-'); else iterationKey = String(index); } else { // For primitive array elements, use index as key iterationKey = String(index); } // Wrap the array element in the standard 'item' structure if needed? // Aiming for consistency: item.key, item.value currentItemForContext = { key: iterationKey, value: loopItem }; } logger.debug(`Preparing iteration ${index} with key: ${iterationKey}`, { baseSetName: this.baseSetName }); // Construct the context for this iteration const iterationContext = { ...this.initialContext, item: currentItemForContext, this: currentItemForContext, // Alias for 'item' ...(this.setDefinition.variables || {}) // Add any set-level variables }; // Create the promise for this iteration's execution and add it to the array // IMPORTANT: We use an async IIFE to capture the current iteration variables correctly const iterationPromise = (async (iterKey, iterContext, iterItem) => { logger.debug(`Starting execution promise for iteration key: ${iterKey}`, { baseSetName: this.baseSetName }); // Execute the set for this iteration // Note: executeIteration now needs slight modification or careful context handling if it relies on instance properties set sequentially // For now, assuming executeIteration is safe to run in parallel with the context passed. const result = await this.executeIteration(iterContext, iterItem, iterKey); logger.debug(`Finished execution promise for iteration key: ${iterKey}`, { baseSetName: this.baseSetName }); return result; // Return the result for Promise.all })(iterationKey, iterationContext, currentItemForContext); // Pass current values to IIFE iterationPromises.push(iterationPromise); } // Execute all iteration promises in parallel logger.info(`Executing ${iterationPromises.length} iterations in parallel for set ${this.baseSetName}...`); const iterationResults = await Promise.all(iterationPromises); // Process results after all iterations have completed for (const result of iterationResults) { // Merge outputs - simple merge, last write wins for same key across iterations // This might need refinement depending on how outputs from different iterations should be combined. if (result && result.outputs) { aggregatedOutputs = { ...aggregatedOutputs, ...result.outputs }; } } logger.info(`Finished all parallel iterations for set ${this.baseSetName}`); return { outputs: aggregatedOutputs }; } /** * Executes the set logic for a specific iteration. * @param {object} context - The base context (workflow level). * @param {object} item - The current item being iterated over (e.g., { key: 'item1', value: {...} } or just the value). * @param {string} iterationKey - The key identifying the current iteration. * @returns {Promise<{ outputs: object }>} Object containing the final outputs map for this iteration. */ async executeIteration(context, item, iterationKey) { // Set the iteration context for the resolver this.outputReferenceResolver.setIterationContext(iterationKey); // Create iteration-specific context const iterationContext = { ...context, item, // Make the whole item available this: item.value !== undefined ? item.value : item // Allow direct access via 'this' if item is simple }; // Execute using the iteration context const result = await this.execute(iterationContext, iterationKey); // Clear the iteration context after use this.outputReferenceResolver.clearIterationContext(); return result; } /** * Core execution logic for the set, used by executeOnce and executeIteration. * @param {Object} context - The context for this specific execution (initial or iteration). * @param {string|null} iterationKey - The iteration key if applicable. * @returns {Promise<{ outputs: object }>} Object containing the final outputs map. */ async execute(context = {}, iterationKey = null) { const effectiveSetName = iterationKey ? `${this.setDefinition.name}[${iterationKey}]` : this.setDefinition.name; logger.debug(`Executing set logic: ${this.setDefinition.name}`, { setName: effectiveSetName }); // Call the actual implementation methods directly, passing the source name if (this.setDefinition.requiredInput) { this.validateRequiredInput(this.setDefinition.requiredInput, context, effectiveSetName); } // Execute tasks defined in the set definition using the provided context const taskOutputs = await this.executeTasks(this.setDefinition.tasks || [], context, iterationKey, effectiveSetName); // Call the actual implementation methods directly, passing the source name if (this.setDefinition.requiredOutput) { this.validateRequiredOutput(this.setDefinition.requiredOutput, taskOutputs, effectiveSetName); } // For iteration, we might want to wrap the outputs differently, but for now return flat map. logger.debug(`Completed set logic execution: ${this.setDefinition.name}`, { setName: effectiveSetName, outputKeys: Object.keys(taskOutputs) }); return { outputs: taskOutputs }; } /** * Execute a single task within the set's context. * @param {string} taskName - Name of the task to execute. * @param {Object} context - Task context (includes workflow/set/iteration variables). * @param {string|null} iterationKey - Optional iteration key. * @param {string} effectiveSetName - The actual set name (including iteration key if applicable). * @returns {Promise<any>} Task execution result (LLM output string). */ async executeTask(taskName, taskRef, context, iterationKey, effectiveSetName) { // --- Refined User-Facing Log (Action First) --- const workflowPrefix = this.isMultiRun ? chalk.dim(` ${path.basename(path.dirname(this.workflowPath))} :`) : ''; const setPrefix = chalk.dim(` ${this.baseSetName} :`); const iterStr = iterationKey ? chalk.dim(` [${iterationKey}]`) : ''; // Updated Format: START <workflow> : <set> : <task> [<iter>] console.log(chalk.blue.bold(`START`) + workflowPrefix + setPrefix + chalk.blue(` ${taskName}`) + iterStr); // --- END User-Facing Log --- logger.debug(`Executing task: ${taskName}`, { setName: effectiveSetName }); // Add logging for the context parameter received by executeTask logger.debug(`SetExecutor.executeTask received context keys: ${Object.keys(context).join(', ')}`); try { logger.debug(`SetExecutor.executeTask received context object: ${JSON.stringify(context)}`); } catch (e) { logger.error('Failed to stringify context in executeTask'); } let responseContent; // Define outside try block try { // Load task definition const { task, content: taskTemplate } = await this.registry.loadTask(taskName); // Resolve prior outputs and merge into a temporary context for this task only let taskSpecificContext = { ...context }; // Start with the set/iteration context let resolvedPriorOutputs = {}; // Keep track of resolved prior outputs separately if (taskRef.prior_outputs && typeof taskRef.prior_outputs === 'object') { logger.debug(`Resolving prior_outputs for task '${taskName}'`, { config: taskRef.prior_outputs }); for (const [localName, reference] of Object.entries(taskRef.prior_outputs)) { let resolvedValue = undefined; // Default to undefined try { const referenceString = reference.replace(/^\{\{/, '').replace(/\}\}/, '').trim(); // *** DRY RUN CHECK FOR [this] *** if (this.dryRun && referenceString.includes('[this]')) { // Silently skip resolution for [this] in dry run // Do not log or add a dryRunIssue, as this is expected. logger.debug(`[DRY RUN] Skipping prior_outputs resolution for '${localName}' due to [this] reference.`); resolvedValue = undefined; // Still pass undefined to renderer } else { // Attempt resolution only if not dry run or if it doesn't contain [this] resolvedValue = this.outputReferenceResolver.resolveReference(referenceString, null); } logger.debug(`[SE_RESOLVE] Resolved prior_output '${localName}' ('${referenceString}') to value: ${JSON.stringify(resolvedValue)} (Type: ${typeof resolvedValue})`); resolvedPriorOutputs[localName] = resolvedValue; } catch (error) { const errorMsg = `Failed to resolve prior_output reference '${reference}' for local name '${localName}'`; if (!this.dryRun) { logger.error(errorMsg, { error: error.message }); // Potentially make this fatal in non-dry run? } else { // Log as debug internally, add an ERROR issue for the summary const dryRunMessage = `${errorMsg}. Reason: ${error.message} (Task output expected but not found in dry run).`; logger.debug(`[DRY RUN] ${dryRunMessage}`); // Keep internal log as debug this.workflowExecutor.addDryRunIssue('error', dryRunMessage); // <<< Use ERROR type } resolvedPriorOutputs[localName] = undefined; } } logger.debug(`Resolved prior outputs for '${taskName}'. Keys: ${Object.keys(resolvedPriorOutputs).join(', ')}`); } // Create the final context for rendering just before calling the renderer const renderContext = { ...context, ...resolvedPriorOutputs }; logger.debug(`Final render context keys for '${taskName}': ${Object.keys(renderContext).join(', ')}`); // Add detailed logging of the actual context object try { logger.debug(`Final render context object: ${JSON.stringify(renderContext)}`); } catch (e) { logger.error('Failed to stringify renderContext', { keys: Object.keys(renderContext) }); } // Render the prompt template using the instance stored in the constructor const renderer = this.renderer; // Use the instance created in the constructor const renderedPrompt = await renderer.render(taskTemplate, renderContext, this.dryRun); // Convert the rendered Markdown prompt to LLM-XML format let xmlPromptContent; try { xmlPromptContent = await convertMarkdownToXML(renderedPrompt); logger.debug('Successfully converted Markdown prompt to LLMXML', { taskName, setName: effectiveSetName }); } catch (xmlError) { logger.error('Failed to convert Markdown prompt to LLMXML', { taskName, setName: effectiveSetName, error: xmlError.message, markdownPrompt: renderedPrompt.substring(0, 200) + '...' }); throw new Error(`LLMXML conversion failed for task ${taskName}: ${xmlError.message}`); } // --- Mock Execution Check --- if (this.mockTaskExecution) { logger.info(`MOCK EXECUTION: Skipping LLM call for task ${taskName}`, { setName: effectiveSetName }); responseContent = `[Mock Output for task ${taskName} (Set: ${effectiveSetName})]`; } else if (this.dryRun) { // Skip LLM call in dry run mode logger.info(`DRY RUN: Skipping LLM call for task ${taskName}`, { setName: effectiveSetName }); responseContent = `[DRY RUN] This would be the response from task ${taskName} in set ${effectiveSetName}.\nPrompt preview: ${xmlPromptContent.substring(0, 200)}...`; } else { // --- Actual LLM Call --- const messages = [{ role: 'user', content: xmlPromptContent }]; try { responseContent = await this.llmClient.completeMessages(messages, this.modelConfig); logger.debug(`LLM response received for task ${taskName}`, { setName: effectiveSetName, type: typeof responseContent, contentPreview: typeof responseContent === 'string' ? responseContent.substring(0, 100) + '...' : '[non-string]' }); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); logger.error(`LLM call failed for task ${taskName}: ${errorMessage}`, { setName: effectiveSetName, error: errorMessage }); // Add task context to the error error.task = taskName; throw error; // Re-throw after logging } } // --- End LLM Call / Mock --- // --- Record Output --- if (this.workflowExecutor && typeof this.workflowExecutor.recordTaskOutput === 'function') { this.workflowExecutor.recordTaskOutput(this.setIndex, this.baseSetName, taskName, iterationKey, responseContent); } else { logger.warn('WorkflowExecutor or recordTaskOutput method not available for recording output.', { setName: effectiveSetName, taskName }); } // --- End Record Output --- // --- Save Prompt/Output (if enabled) --- if (this.savePrompts) { try { // Use effectiveSetName which includes iteration key if present for path safety const safeSetName = effectiveSetName.replace(/[/\\?%*:|\"<>]/g, '-'); const promptFileName = `${taskName}.prompt.xml`; // Save XML prompt // Changed outputDir to be relative to workflow dir, in a top-level 'prompts' folder const outputDir = path.resolve(path.dirname(this.workflowPath), 'prompts', safeSetName); const finalPromptPath = path.join(outputDir, promptFileName); await fs.mkdir(outputDir, { recursive: true }); await fs.writeFile(finalPromptPath, xmlPromptContent); logger.debug(`Saved task prompt`, { taskName, directory: outputDir, promptFile: promptFileName }); } catch (saveError) { logger.error(`Failed to save prompt for task ${taskName}`, { setName: effectiveSetName, error: saveError.message }); // Don't fail the whole task, just log the save error } } // --- End Save Prompt/Output --- return responseContent; // Return the successful response } catch (error) { logger.error(`Failed task: ${taskName}`, { setName: effectiveSetName, error: error.message }); // Add task context if missing if (!error.task) { error.task = taskName; } throw error; // Propagate error up } } /** * Executes a list of tasks in parallel. * @param {Array<object>} tasks - Array of task references (e.g., { useTask: 'taskName', outputAs: 'varName' }). * @param {Object} context - The context to pass to each task. * @param {string|null} iterationKey - The current iteration key, if any. * @param {string} effectiveSetName - The set name (potentially including iteration key). * @returns {Promise<Object>} A map of task output variable names to their results. */ async executeTasks(tasks, context, iterationKey, effectiveSetName) { if (!tasks || tasks.length === 0) { logger.debug('No tasks defined in set.', { setName: effectiveSetName }); return {}; // No tasks, return empty outputs } const taskPromises = tasks.map(taskRef => { // Pass taskRef down const taskName = taskRef.useTask; if (!taskName) { throw new Error('Task reference is missing `useTask` property'); } // Execute the task and associate result with its designated output variable name // Pass the full taskRef to executeTask so it can access prior_outputs return this.executeTask(taskName, taskRef, context, iterationKey, effectiveSetName) .then(result => { // ALWAYS use the task name as the output key, ignore outputAs logger.debug(`Mapping result for task '${taskName}' to output key '${taskName}'`, { setName: effectiveSetName }); return { [taskName]: result }; }) .catch(error => { logger.error(`Task execution promise failed: ${taskName}`, { setName: effectiveSetName, error: error.message }); // Propagate the error to Promise.all throw error; }); }); try { const resultsArray = await Promise.all(taskPromises); // Combine results from [{taskName1: res1}, {taskName2: res2}] into {taskName1: res1, taskName2: res2} const combinedResults = resultsArray.reduce((acc, current) => ({ ...acc, ...current }), {}); logger.debug('All parallel tasks completed.', { setName: effectiveSetName, outputKeys: Object.keys(combinedResults) }); return combinedResults; } catch (error) { logger.error('One or more tasks failed during parallel execution.', { setName: effectiveSetName, error: error.message }); // Error should already have task/set context added by executeTask throw error; // Re-throw the first error encountered by Promise.all } } /** * Validate required inputs are present in context, supporting dot notation. * @param {Array<string>} required - Required input fields (can use dot notation). * @param {Object} context - Context to validate. * @param {string} source - Source of the requirement for error messages. */ validateRequiredInput(required, context, source) { // Use the imported utility function const missing = required.filter(key => getNestedProperty(context, key) === undefined); if (missing.length > 0) { throw new Error(`Set [${source}] missing required input variables: ${missing.join(', ')}`); } } /** * Validate required outputs are present in result * @param {Array<string>} required - Required output fields * @param {Object} result - Result to validate * @param {string} source - Source of the requirement for error messages */ validateRequiredOutput(required, result, source) { const missing = required.filter(key => !(key in result)); if (missing.length > 0) { throw new Error(`Set [${source}] missing required output variables: ${missing.join(', ')}`); } } }