UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

453 lines (452 loc) 17.4 kB
/** * workflow/core/ensembleExecutor.ts * Parallel execution engine for ensemble workflows */ import pLimit from "p-limit"; import { AIProviderFactory } from "../../core/factory.js"; import { logger } from "../../utils/logger.js"; import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../../observability/index.js"; import { WorkflowError } from "../../types/index.js"; import { withSpan } from "../../telemetry/withSpan.js"; import { tracers } from "../../telemetry/tracers.js"; const functionTag = "EnsembleExecutor"; // ============================================================================ // EXECUTION FUNCTIONS // ============================================================================ /** * Execute ensemble of models in parallel * @param options - Execution options including prompt and models * @returns Ensemble execution result with all responses and metrics */ export async function executeEnsemble(options) { return withSpan({ name: "neurolink.workflow.ensemble.execute", tracer: tracers.workflow, attributes: { "workflow.model_count": options.models.length, "workflow.parallelism": options.executionConfig?.parallelism ?? 10, }, }, async (otelSpan) => executeEnsembleInner(options, otelSpan)); } async function executeEnsembleInner(options, otelSpan) { const startTime = Date.now(); const { prompt, models, executionConfig, systemPrompt, workflowDefaults } = options; const span = SpanSerializer.createSpan(SpanType.WORKFLOW, "workflow.ensemble", { "workflow.operation": "ensemble", "workflow.model_count": models.length, "workflow.parallelism": executionConfig?.parallelism || 10, }); logger.info(`[${functionTag}] Starting ensemble execution`, { modelCount: models.length, parallelism: executionConfig?.parallelism || 10, }); // Set up concurrency limiter const limit = pLimit(executionConfig?.parallelism || 10); // Create execution promises const executionPromises = models.map((model) => limit(() => executeModel({ model, prompt, systemPrompt, timeout: model.timeout || executionConfig?.modelTimeout || 15000, }, workflowDefaults?.systemPrompt))); // Execute all models const responses = await Promise.all(executionPromises); const totalTime = Date.now() - startTime; const successCount = responses.filter((r) => r.status === "success").length; const failureCount = responses.filter((r) => r.status === "failure").length; // Collect errors const errors = responses .filter((r) => r.status === "failure" && r.error) .map((r) => { const errorMsg = typeof r.error === "string" ? r.error : "Unknown error"; return new WorkflowError(errorMsg, { code: "MODEL_EXECUTION_ERROR", workflowId: "ensemble", phase: "ensemble", retryable: true, }); }); // Check if we met minimum response threshold const minResponses = executionConfig?.minResponses || 1; if (successCount < minResponses) { const error = new WorkflowError(`Insufficient successful responses: ${successCount}/${minResponses} required`, { code: "INSUFFICIENT_RESPONSES", workflowId: "ensemble", phase: "ensemble", retryable: false, }); errors.push(error); } logger.info(`[${functionTag}] Ensemble execution completed`, { totalTime, successCount, failureCount, totalResponses: responses.length, }); span.durationMs = totalTime; const spanStatus = successCount > 0 ? SpanStatus.OK : SpanStatus.ERROR; const endedSpan = SpanSerializer.endSpan(span, spanStatus, successCount === 0 ? "No successful model responses" : undefined); getMetricsAggregator().recordSpan(endedSpan); otelSpan.setAttribute("workflow.success_count", successCount); otelSpan.setAttribute("workflow.failure_count", failureCount); otelSpan.setAttribute("workflow.total_time_ms", totalTime); return { responses, totalTime, successCount, failureCount, errors, }; } /** * Execute a single model with timeout and error handling * @param options - Model execution options * @returns Ensemble response with result or error */ async function executeModel(options, workflowDefaultSystemPrompt) { const { model, prompt, systemPrompt, timeout } = options; const startTime = Date.now(); logger.debug(`[${functionTag}] Executing model`, { provider: model.provider, model: model.model, timeout, }); try { // Create provider instance const provider = await createProvider(model); // Resolve system prompt with hierarchical fallback: // 1. Direct parameter (highest priority) // 2. Model-specific systemPrompt // 3. Workflow-level default // 4. undefined (provider default) const resolvedSystemPrompt = systemPrompt || model.systemPrompt || workflowDefaultSystemPrompt; // Execute with timeout. Some upstream LLM endpoints (notably Vertex // under high parallel load) occasionally respond with an empty // assistant message instead of content; retry once before giving up so // a transient empty response doesn't cascade into an empty ensemble // result that downstream tests + judges have no way to evaluate. let result; let attempts = 0; const MAX_ATTEMPTS = 2; /* eslint-disable no-constant-condition */ while (true) { attempts++; result = await executeWithTimeout(async () => { return await provider.generate({ prompt, systemPrompt: resolvedSystemPrompt, temperature: model.temperature, maxTokens: model.maxTokens, }); }, timeout, `Model ${model.provider}/${model.model} timed out after ${timeout}ms`); const contentLen = result?.content?.length ?? 0; if (contentLen > 0 || attempts >= MAX_ATTEMPTS) { break; } logger.warn(`[${functionTag}] Model returned empty content — retrying once`, { provider: model.provider, model: model.model, attempt: attempts }); } /* eslint-enable no-constant-condition */ const responseTime = Date.now() - startTime; logger.debug(`[${functionTag}] Model execution successful`, { provider: model.provider, model: model.model, responseTime, contentLength: result?.content?.length || 0, }); return { provider: model.provider, model: model.model, modelLabel: model.label, content: result?.content || "", status: "success", responseTime, usage: result?.usage ? { inputTokens: result.usage.input, outputTokens: result.usage.output, totalTokens: result.usage.total, } : undefined, metadata: model.metadata, timestamp: new Date().toISOString(), }; } catch (error) { const responseTime = Date.now() - startTime; const err = error; logger.warn(`[${functionTag}] Model execution failed`, { provider: model.provider, model: model.model, error: err.message, responseTime, }); return { provider: model.provider, model: model.model, modelLabel: model.label, content: "", status: "failure", responseTime, error: err.message, timestamp: new Date().toISOString(), }; } } /** * Create provider instance from model config * @param model - Model configuration * @returns Provider instance */ async function createProvider(model) { return await AIProviderFactory.createProvider(model.provider, model.model); } /** * Execute function with timeout * @param fn - Async function to execute * @param timeoutMs - Timeout in milliseconds * @param timeoutMessage - Error message for timeout * @returns Result of function execution */ async function executeWithTimeout(fn, timeoutMs, timeoutMessage) { let timeoutHandle; const timeoutPromise = new Promise((_, reject) => { timeoutHandle = setTimeout(() => { const error = new WorkflowError(timeoutMessage, { code: "TIMEOUT", workflowId: "ensemble", phase: "ensemble", retryable: true, }); reject(error); }, timeoutMs); }); try { const result = await Promise.race([fn(), timeoutPromise]); if (timeoutHandle) { clearTimeout(timeoutHandle); } return result; } catch (error) { if (timeoutHandle) { clearTimeout(timeoutHandle); } throw error; } } /** * Filter successful responses from ensemble result * @param responses - Array of ensemble responses * @returns Array of successful responses */ export function getSuccessfulResponses(responses) { return responses.filter((r) => r.status === "success"); } /** * Get response by model identifier * @param responses - Array of ensemble responses * @param provider - Provider name * @param model - Model name * @returns Response for specified model or undefined */ export function getResponseByModel(responses, provider, model) { return responses.find((r) => r.provider === provider && r.model === model); } /** * Calculate total tokens used across all responses * @param responses - Array of ensemble responses * @returns Total token count */ export function calculateTotalTokens(responses) { return responses.reduce((total, response) => { if (response.usage?.totalTokens) { return total + response.usage.totalTokens; } return total; }, 0); } /** * Sort responses by response time * @param responses - Array of ensemble responses * @param ascending - Sort in ascending order (default: true) * @returns Sorted array of responses */ export function sortByResponseTime(responses, ascending = true) { return [...responses].sort((a, b) => { return ascending ? a.responseTime - b.responseTime : b.responseTime - a.responseTime; }); } /** * Get fastest successful response * @param responses - Array of ensemble responses * @returns Fastest successful response or undefined */ export function getFastestResponse(responses) { const successful = getSuccessfulResponses(responses); if (successful.length === 0) { return undefined; } return sortByResponseTime(successful, true)[0]; } // ============================================================================ // LAYER-BASED EXECUTION (for ModelGroups) // ============================================================================ /** * Execute model groups sequentially (layers) * Each group executes according to its executionStrategy * @param groups - Array of model groups to execute * @param prompt - User prompt * @param executionConfig - Execution configuration * @param systemPrompt - Direct system prompt override * @param workflowDefaultSystemPrompt - Workflow-level default system prompt * @returns Ensemble execution result with all responses */ export async function executeModelGroups(groups, prompt, _executionConfig, systemPrompt, workflowDefaultSystemPrompt) { return withSpan({ name: "neurolink.workflow.layers.execute", tracer: tracers.workflow, attributes: { "workflow.group_count": groups.length, "workflow.total_models": groups.reduce((n, g) => n + g.models.length, 0), }, }, async (otelSpan) => executeModelGroupsInner(groups, prompt, _executionConfig, systemPrompt, workflowDefaultSystemPrompt, otelSpan)); } async function executeModelGroupsInner(groups, prompt, _executionConfig, systemPrompt, workflowDefaultSystemPrompt, otelSpan) { const startTime = Date.now(); const allResponses = []; const allErrors = []; let totalSuccessCount = 0; let totalFailureCount = 0; logger.info(`[${functionTag}] Executing ${groups.length} model groups sequentially`); // Execute groups sequentially for (const group of groups) { logger.info(`[${functionTag}] Executing group: ${group.id}`, { groupName: group.name, modelCount: group.models.length, strategy: group.executionStrategy, }); const layerResult = await executeLayer({ group, prompt, systemPrompt, workflowDefaultSystemPrompt, }); // Collect results allResponses.push(...layerResult.responses); totalSuccessCount += layerResult.successCount; totalFailureCount += layerResult.failureCount; // Check if we should continue to next group if (!layerResult.shouldContinue) { logger.warn(`[${functionTag}] Stopping execution after group: ${group.id}`, { reason: `Failed to meet minSuccessful threshold (${group.minSuccessful || 1})`, }); // Add error for incomplete execution if (group.continueOnFailure === false) { const error = new WorkflowError(`Group ${group.id} failed to meet minSuccessful threshold`, { code: "GROUP_EXECUTION_FAILED", workflowId: "ensemble", phase: "ensemble", retryable: false, details: { groupId: group.id, successCount: layerResult.successCount, minRequired: group.minSuccessful || 1, }, }); allErrors.push(error); } break; } } const totalTime = Date.now() - startTime; logger.info(`[${functionTag}] Model groups execution completed`, { totalTime, totalResponses: allResponses.length, successCount: totalSuccessCount, failureCount: totalFailureCount, }); otelSpan.setAttribute("workflow.success_count", totalSuccessCount); otelSpan.setAttribute("workflow.failure_count", totalFailureCount); otelSpan.setAttribute("workflow.total_time_ms", totalTime); return { responses: allResponses, totalTime, successCount: totalSuccessCount, failureCount: totalFailureCount, errors: allErrors, }; } /** * Execute a single layer/group of models * @param options - Layer execution options * @returns Layer execution result */ async function executeLayer(options) { const startTime = Date.now(); const { group, prompt, systemPrompt, workflowDefaultSystemPrompt } = options; if (group.executionStrategy === "parallel") { // Execute all models in parallel const result = await executeEnsemble({ prompt, models: group.models, executionConfig: { parallelism: group.parallelism || 10, modelTimeout: group.timeout, }, systemPrompt, workflowDefaults: { systemPrompt: workflowDefaultSystemPrompt, }, }); const shouldContinue = result.successCount >= (group.minSuccessful || 1) || group.continueOnFailure !== false; return { groupId: group.id, responses: result.responses, successCount: result.successCount, failureCount: result.failureCount, executionTime: Date.now() - startTime, shouldContinue, }; } else { // Execute models sequentially (one after another) const responses = []; let successCount = 0; let failureCount = 0; for (const model of group.models) { const response = await executeModel({ model, prompt, systemPrompt, timeout: model.timeout || group.timeout || 15000, }, workflowDefaultSystemPrompt); responses.push(response); if (response.status === "success") { successCount++; } else { failureCount++; } logger.debug(`[${functionTag}] Sequential execution progress`, { groupId: group.id, model: `${model.provider}/${model.model}`, status: response.status, successCount, failureCount, }); } const shouldContinue = successCount >= (group.minSuccessful || 1) || group.continueOnFailure !== false; return { groupId: group.id, responses, successCount, failureCount, executionTime: Date.now() - startTime, shouldContinue, }; } }