UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

642 lines (641 loc) 26.4 kB
/** * Workflow Runner - Main Orchestrator * =================================== * * Coordinates the complete workflow execution pipeline: * 1. Model execution (layer-based or flat) * 2. Judge scoring with hierarchical prompts * 3. Response conditioning (stub) * 4. Metrics collection * 5. Result assembly * * @module workflow/core/workflowRunner */ import { SpanStatusCode } from "@opentelemetry/api"; import { logger } from "../../utils/logger.js"; import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../../observability/index.js"; import { withSpan } from "../../telemetry/withSpan.js"; import { tracers } from "../../telemetry/tracers.js"; import { getModelGroups, PLACEHOLDER_MODEL, PLACEHOLDER_PROVIDER, usesModelGroups, } from "../config.js"; import { validateWorkflow } from "../utils/workflowValidation.js"; import { executeEnsemble, executeModelGroups } from "./ensembleExecutor.js"; import { scoreEnsemble } from "./judgeScorer.js"; import { conditionResponse } from "./responseConditioner.js"; /** * Execute a complete workflow * * This is the main entry point that orchestrates: * - Model execution (respects modelGroups or flat models) * - Judge scoring (with hierarchical prompt resolution) * - Response conditioning (currently stub) * - Metrics calculation * - Result assembly * * @param config - Validated workflow configuration * @param options - Execution options including prompt * @returns Complete workflow result with scores and metrics * * @example * ```typescript * const result = await runWorkflow(config, { * prompt: 'Explain quantum entanglement', * timeout: 30000, * verbose: true, * }); * * console.log('Best response:', result.content); * console.log('Score:', result.score); * ``` */ export async function runWorkflow(config, options) { return withSpan({ name: "neurolink.workflow.run", tracer: tracers.sdk, attributes: { "workflow.name": config.name, "workflow.type": config.type, "workflow.id": config.id ?? "unknown", }, }, async (otelSpan) => { const startTime = Date.now(); const span = SpanSerializer.createSpan(SpanType.WORKFLOW, "workflow.run", { "workflow.operation": "run", "workflow.name": config.name, "workflow.type": config.type, "workflow.id": config.id, }); // Validate configuration const validation = validateWorkflow(config); if (!validation.valid) { span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, `Invalid workflow configuration: ${validation.errors.map((err) => err.message).join(", ")}`); getMetricsAggregator().recordSpan(endedSpan); throw new Error(`Invalid workflow configuration: ${validation.errors.map((err) => err.message).join(", ")}`); } if (options.verbose) { logger.debug(`[WorkflowRunner] Starting workflow: ${config.name}`); logger.debug(`[WorkflowRunner] Type: ${config.type}`); logger.debug(`[WorkflowRunner] Uses layer-based execution: ${usesModelGroups(config)}`); } try { // Step 1: Execute models (layer-based or flat) const ensembleResult = await executeModels(config, options); if (options.verbose) { logger.debug(`[WorkflowRunner] Received ${ensembleResult.responses.length} model responses`); logger.debug(`[WorkflowRunner] Successful: ${ensembleResult.successCount}`); } // Step 2: Score responses with judge(s) const scoreResult = await scoreResponses(config, ensembleResult.responses, options); if (options.verbose) { logger.debug(`[WorkflowRunner] Scoring complete`); logger.debug(`[WorkflowRunner] Scores:`, scoreResult.scores); } // Step 3: Select best response const bestResponse = selectBestResponse(ensembleResult.responses, scoreResult.scores); if (options.verbose) { logger.debug(`[WorkflowRunner] Best response: ${bestResponse.model}`); const bestScore = extractScore(scoreResult.scores, bestResponse, ensembleResult.responses); logger.debug(`[WorkflowRunner] Best score: ${bestScore}`); } // CRITICAL: Store original content BEFORE any processing const originalContent = bestResponse.content; // Step 4: Get processed content // Priority: Judge-synthesized > Separate conditioning > Original let processedContent; let conditioningTime = 0; const judgeScores = isJudgeScores(scoreResult.scores) ? scoreResult.scores : convertToJudgeScores(scoreResult.scores); if (judgeScores.synthesizedResponse) { // Judge already synthesized improved response processedContent = judgeScores.synthesizedResponse; logger.debug(`[WorkflowRunner] Using judge-synthesized response`); } else if (config.conditioning) { // Fall back to separate conditioning if configured const conditionedContent = await conditionFinalResponse(bestResponse, scoreResult.scores, config, options, ensembleResult.responses); processedContent = conditionedContent.content; conditioningTime = conditionedContent.conditioningTime; logger.debug(`[WorkflowRunner] Using separate conditioning`); } else { // No processing, use original processedContent = originalContent; logger.debug(`[WorkflowRunner] No conditioning applied`); } // Step 5: Calculate execution metrics const executionTime = Date.now() - startTime; const ensembleTime = ensembleResult.totalTime; const judgeTime = scoreResult.judgeTime; // Step 6: Assemble complete result const result = { // Primary output (processed version) content: processedContent, // IMPORTANT: Store original unmodified response separately originalContent: originalContent, // Evaluation metrics (0-100 scale) score: extractScore(scoreResult.scores, bestResponse, ensembleResult.responses), reasoning: extractReasoning(scoreResult.scores), // Ensemble data ensembleResponses: ensembleResult.responses, // Judge data judgeScores: judgeScores, selectedResponse: bestResponse, // Quality metrics confidence: extractConfidence(scoreResult.scores), consensus: extractConsensus(scoreResult.scores), // Performance metrics totalTime: executionTime, ensembleTime, judgeTime, conditioningTime: conditioningTime, // Workflow metadata workflow: config.id, workflowName: config.name, workflowVersion: config.version, // Resource usage usage: { totalInputTokens: calculateInputTokens(ensembleResult.responses), totalOutputTokens: calculateOutputTokens(ensembleResult.responses), totalTokens: calculateTotalTokens(ensembleResult.responses), byModel: [], // TODO: Populate per-model breakdown }, // Additional metadata metadata: options.metadata, timestamp: new Date().toISOString(), }; if (options.verbose) { logger.debug(`[WorkflowRunner] Workflow complete in ${executionTime}ms`); logger.debug(`[WorkflowRunner] Total tokens: ${result.usage?.totalTokens || 0}`); } span.durationMs = executionTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK); getMetricsAggregator().recordSpan(endedSpan); return result; } catch (error) { const executionTime = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); if (options.verbose) { logger.error(`[WorkflowRunner] Workflow failed:`, errorMessage); } span.durationMs = executionTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, errorMessage); getMetricsAggregator().recordSpan(endedSpan); // Mark outer OTel span as ERROR since we return instead of rethrowing otelSpan.recordException(error instanceof Error ? error : new Error(errorMessage)); otelSpan.setStatus({ code: SpanStatusCode.ERROR, message: errorMessage, }); // Return error result with dummy data const dummyResponse = { provider: PLACEHOLDER_PROVIDER, model: PLACEHOLDER_MODEL, content: "", responseTime: 0, status: "failure", error: errorMessage, timestamp: new Date().toISOString(), }; return { content: "", score: 0, reasoning: `Workflow execution failed: ${errorMessage}`, ensembleResponses: [dummyResponse], confidence: 0, totalTime: executionTime, ensembleTime: 0, workflow: config.id, workflowName: config.name, workflowVersion: config.version, metadata: options.metadata, timestamp: new Date().toISOString(), }; } }); // end withSpan } /** * Execute models using layer-based or flat execution */ async function executeModels(config, options) { const executionConfig = { timeout: options.timeout || config.execution?.timeout, parallelism: options.parallelism || config.execution?.parallelism, modelTimeout: config.execution?.modelTimeout, minResponses: config.execution?.minResponses, }; // Use layer-based execution if modelGroups defined if (usesModelGroups(config)) { const modelGroups = getModelGroups(config); if (options.verbose) { logger.debug(`[WorkflowRunner] Using layer-based execution`); logger.debug(`[WorkflowRunner] Groups: ${modelGroups.length}`); } return executeModelGroups(modelGroups, options.prompt, executionConfig, undefined, // systemPrompt override config.defaultSystemPrompt); } // Use flat parallel execution (backward compatible) if (options.verbose) { logger.debug(`[WorkflowRunner] Using flat parallel execution`); logger.debug(`[WorkflowRunner] Models: ${config.models.length}`); } return executeEnsemble({ prompt: options.prompt, models: config.models, executionConfig, workflowDefaults: { systemPrompt: config.defaultSystemPrompt, }, }); } /** * Score responses using judge(s) */ async function scoreResponses(config, responses, options) { // Use judges array if defined, otherwise use single judge const judges = config.judges && config.judges.length > 0 ? config.judges : config.judge ? [config.judge] : []; if (judges.length === 0) { // No judges configured - return neutral scores if (options.verbose) { logger.debug(`[WorkflowRunner] No judges configured, using neutral scores`); } const neutralScores = { judgeProvider: PLACEHOLDER_PROVIDER, judgeModel: PLACEHOLDER_MODEL, scores: {}, criteria: [], reasoning: "No judge configured", judgeTime: 0, timestamp: new Date().toISOString(), }; // Assign neutral score (50) to each response for (const response of responses) { const responseId = `${response.provider}-${response.model}`; neutralScores.scores[responseId] = 50; } return { scores: neutralScores, judgeTime: 0, }; } if (options.verbose) { logger.debug(`[WorkflowRunner] Using ${judges.length} judge(s)`); } return scoreEnsemble({ judges, responses, originalPrompt: options.prompt, timeout: config.execution?.timeout, workflowDefaults: { judgePrompt: config.defaultJudgePrompt, }, }); } /** * Select best response based on scores */ function selectBestResponse(responses, scores) { // Filter to successful responses only const successful = responses.filter((r) => r.status === "success"); if (successful.length === 0) { // No successful responses - return first response return (responses[0] || { provider: PLACEHOLDER_PROVIDER, model: PLACEHOLDER_MODEL, content: "", responseTime: 0, status: "failure", error: "No responses received", timestamp: new Date().toISOString(), }); } // Find response with highest score let bestResponse = successful[0]; let bestScore = getResponseScore(bestResponse, scores, responses); for (const response of successful) { const score = getResponseScore(response, scores, responses); if (score > bestScore) { bestScore = score; bestResponse = response; } } return bestResponse; } /** * Get score for a specific response */ function getResponseScore(response, scores, responses) { // Judge scores only successful responses, so we need to find the index // in the filtered successful responses array, not the original array const successfulResponses = responses.filter((r) => r.status === "success"); const successfulIndex = successfulResponses.indexOf(response); logger.debug(`[WorkflowRunner] getResponseScore`, { responseProvider: response.provider, responseModel: response.model, responseStatus: response.status, originalIndex: responses.indexOf(response), successfulIndex, totalResponses: responses.length, successfulCount: successfulResponses.length, lookupKey: `response-${successfulIndex}`, availableKeys: Object.keys(scores.scores), }); if (successfulIndex >= 0) { const indexKey = `response-${successfulIndex}`; if (indexKey in scores.scores) { const score = scores.scores[indexKey]; logger.debug(`[WorkflowRunner] Found score ${score} for ${indexKey}`); return score; } } // Fallback to provider-model format const responseId = `${response.provider}-${response.model}`; logger.debug(`[WorkflowRunner] No index-based score found, trying provider-model: ${responseId}`); return scores.scores[responseId] || 0; } /** * Condition the final response - synthesize improved response using judge feedback */ async function conditionFinalResponse(response, scores, config, options, allResponses) { // Use conditioner if configured if (config.conditioning) { if (options.verbose) { logger.debug(`[WorkflowRunner] Applying response conditioner with synthesis`); } return conditionResponse({ content: response.content, selectedResponse: response, allResponses, judgeScores: scores, config: config.conditioning, originalPrompt: options.prompt, }); } // No conditioning - return original with metadata return { content: response.content, conditioningTime: 0, metadata: { conditioningApplied: false, originalLength: response.content.length, finalLength: response.content.length, }, }; } // ============================================================================ // HELPER FUNCTIONS FOR RESULT ASSEMBLY // ============================================================================ /** * Type guard to check if scores are JudgeScores (not MultiJudgeScores) */ function isJudgeScores(scores) { return !("judges" in scores); } /** * Convert MultiJudgeScores to JudgeScores format for WorkflowResult */ function convertToJudgeScores(scores) { return { judgeProvider: scores.judgeProvider || "multi-judge", judgeModel: scores.judgeModel || "consensus", scores: scores.scores, ranking: scores.ranking, bestResponse: scores.bestResponse, criteria: scores.criteria, reasoning: scores.reasoning, confidenceInJudgment: scores.confidenceInJudgment, judgeTime: scores.judgeTime, timestamp: scores.timestamp, }; } /** * Extract best score from judge result */ function extractScore(scores, bestResponse, responses) { return getResponseScore(bestResponse, scores, responses); } /** * Extract reasoning from judge result */ function extractReasoning(scores) { return scores.reasoning || "No reasoning provided"; } /** * Extract confidence from judge result */ function extractConfidence(scores) { return scores.confidenceInJudgment || 0.5; } /** * Extract consensus level from scores */ function extractConsensus(scores) { if ("consensusLevel" in scores) { return scores.consensusLevel; } return undefined; } /** * Calculate total input tokens */ function calculateInputTokens(responses) { return responses.reduce((sum, r) => sum + (r.usage?.inputTokens || 0), 0); } /** * Calculate total output tokens */ function calculateOutputTokens(responses) { return responses.reduce((sum, r) => sum + (r.usage?.outputTokens || 0), 0); } /** * Calculate total tokens */ function calculateTotalTokens(responses) { return responses.reduce((sum, r) => sum + (r.usage?.totalTokens || 0), 0); } /** * Execute workflow with progressive streaming support * Yields preliminary response (first completed model) and final synthesized response * * @param config - Validated workflow configuration * @param options - Execution options with streaming enabled * @returns AsyncGenerator yielding preliminary and final responses * * @example * ```typescript * for await (const chunk of runWorkflowWithStreaming(config, options)) { * if (chunk.type === 'preliminary') { * console.log('Fast response:', chunk.content); * } else { * console.log('Final synthesis:', chunk.content); * } * } * ``` */ export async function* runWorkflowWithStreaming(config, options) { // Wrap the generator in an active OTel span so Pipeline A captures the // streaming workflow end-to-end and Pipeline B (below) inherits its traceId. const generator = tracers.workflow.startActiveSpan("neurolink.workflow.run.streaming", { attributes: { "workflow.name": config.name, "workflow.type": config.type, "workflow.id": config.id ?? "unknown", }, }, (otelSpan) => runWorkflowStreamingInner(config, options, otelSpan)); yield* generator; } async function* runWorkflowStreamingInner(config, options, otelSpan) { const startTime = Date.now(); const span = SpanSerializer.createSpan(SpanType.WORKFLOW, "workflow.run.streaming", { "workflow.operation": "run.streaming", "workflow.name": config.name, "workflow.type": config.type, "workflow.id": config.id, }); // Validate configuration const validation = validateWorkflow(config); if (!validation.valid) { const errMsg = `Invalid workflow configuration: ${validation.errors.map((err) => err.message).join(", ")}`; span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, errMsg); getMetricsAggregator().recordSpan(endedSpan); const err = new Error(errMsg); otelSpan.recordException(err); otelSpan.setStatus({ code: SpanStatusCode.ERROR, message: errMsg }); otelSpan.end(); throw err; } if (options.verbose) { logger.debug(`[WorkflowRunner] Starting streaming workflow: ${config.name}`); } // eslint-disable-next-line no-useless-assignment -- read in finally block let spanEnded = false; try { // Step 1: Execute models const ensembleResult = await executeModels(config, options); if (options.verbose) { logger.debug(`[WorkflowRunner] Ensemble complete with ${ensembleResult.successCount} successful responses`); } // Yield preliminary response (first successful model) if (ensembleResult.successCount > 0) { const firstResponse = ensembleResult.responses.find((r) => r.status === "success" && Boolean(r.content)); if (firstResponse) { if (options.verbose) { logger.debug(`[WorkflowRunner] Yielding preliminary response from ${firstResponse.model}`); } yield { type: "preliminary", content: firstResponse.content, partialResult: { ensembleResponses: [firstResponse], workflow: config.id, workflowName: config.name, }, }; } } // Step 2: Continue with full workflow execution (judge + synthesis) const scoreResult = await scoreResponses(config, ensembleResult.responses, options); const bestResponse = selectBestResponse(ensembleResult.responses, scoreResult.scores); const originalContent = bestResponse.content; // Step 3: Get processed content let processedContent; let conditioningTime = 0; const judgeScores = isJudgeScores(scoreResult.scores) ? scoreResult.scores : convertToJudgeScores(scoreResult.scores); if (judgeScores.synthesizedResponse) { processedContent = judgeScores.synthesizedResponse; if (options.verbose) { logger.debug(`[WorkflowRunner] Using judge-synthesized response`); } } else if (config.conditioning) { const conditionedContent = await conditionFinalResponse(bestResponse, scoreResult.scores, config, options, ensembleResult.responses); processedContent = conditionedContent.content; conditioningTime = conditionedContent.conditioningTime; if (options.verbose) { logger.debug(`[WorkflowRunner] Using separate conditioning`); } } else { processedContent = originalContent; if (options.verbose) { logger.debug(`[WorkflowRunner] No conditioning applied`); } } const executionTime = Date.now() - startTime; const ensembleTime = ensembleResult.totalTime; const judgeTime = scoreResult.judgeTime; // Yield final synthesized response if (options.verbose) { logger.debug(`[WorkflowRunner] Yielding final synthesized response`); } yield { type: "final", content: processedContent, partialResult: { content: processedContent, originalContent: originalContent, score: extractScore(scoreResult.scores, bestResponse, ensembleResult.responses), reasoning: extractReasoning(scoreResult.scores), ensembleResponses: ensembleResult.responses, judgeScores: judgeScores, selectedResponse: bestResponse, confidence: extractConfidence(scoreResult.scores), consensus: extractConsensus(scoreResult.scores), totalTime: executionTime, ensembleTime, judgeTime, conditioningTime: conditioningTime, workflow: config.id, workflowName: config.name, workflowVersion: config.version, usage: { totalInputTokens: calculateInputTokens(ensembleResult.responses), totalOutputTokens: calculateOutputTokens(ensembleResult.responses), totalTokens: calculateTotalTokens(ensembleResult.responses), byModel: [], }, metadata: options.metadata, timestamp: new Date().toISOString(), }, }; span.durationMs = executionTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.OK); getMetricsAggregator().recordSpan(endedSpan); spanEnded = true; otelSpan.setStatus({ code: SpanStatusCode.OK }); otelSpan.end(); } catch (error) { spanEnded = true; const executionTime = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); span.durationMs = executionTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR, errorMessage); getMetricsAggregator().recordSpan(endedSpan); if (error instanceof Error) { otelSpan.recordException(error); } otelSpan.setStatus({ code: SpanStatusCode.ERROR, message: errorMessage }); otelSpan.end(); logger.error(`[WorkflowRunner] Streaming workflow failed`, { error: errorMessage, }); throw error; } finally { // Guard against span leak when the consumer breaks out of the async // generator early (neither try-success nor catch fires in that case). if (!spanEnded) { otelSpan.setStatus({ code: SpanStatusCode.OK }); otelSpan.end(); } } }