UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

271 lines (270 loc) 10.7 kB
/** * @file BatchEvaluator - Supports batch evaluation of multiple responses. * Enables parallel evaluation with configurable concurrency and error handling. */ import { Evaluator } from "./index.js"; import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js"; import { logger } from "../utils/logger.js"; import { NeuroLinkFeatureError } from "../core/infrastructure/index.js"; function hasEvaluationData(result) { return result.success && result.data !== undefined; } /** * BatchEvaluator - Performs evaluation on multiple items in parallel. * Supports configurable concurrency, retry logic, and progress tracking. * * @example * ```typescript * const batchEvaluator = new BatchEvaluator({ * concurrency: 3, * continueOnError: true, * onProgress: (progress) => console.log(`${progress.percentComplete}% complete`) * }); * * const items = [ * { id: '1', options: opts1, result: result1 }, * { id: '2', options: opts2, result: result2 }, * ]; * * const batchResult = await batchEvaluator.evaluateBatch(items); * console.log(`Passing rate: ${batchResult.summary.passingRate}%`); * ``` */ export class BatchEvaluator { config; constructor(config = {}) { this.config = { concurrency: 5, continueOnError: true, maxRetries: 2, retryDelay: 1000, ...config, }; } /** * Create a fresh Evaluator instance for each evaluation to avoid leaking state. */ _createEvaluator() { return new Evaluator(this.config); } /** * Evaluates a batch of items in parallel with controlled concurrency. * * @param items - Array of items to evaluate * @param autoEvalConfig - Auto-evaluation configuration for thresholds * @returns Batch evaluation results with summary statistics */ async evaluateBatch(items, autoEvalConfig = {}) { const startTime = Date.now(); const results = []; const concurrency = this.config.concurrency || 5; // Track progress let completed = 0; let succeeded = 0; let failed = 0; const reportProgress = () => { if (this.config.onProgress) { try { this.config.onProgress({ total: items.length, completed, succeeded, failed, pending: items.length - completed, percentComplete: Math.round((completed / items.length) * 100), }); } catch (callbackError) { logger.warn("[BatchEvaluator] onProgress callback threw an error", { error: callbackError instanceof Error ? callbackError.message : String(callbackError), }); } } }; // Process items with concurrency limit const processItem = async (item) => { const itemStartTime = Date.now(); let retryCount = 0; let lastError; while (retryCount <= (this.config.maxRetries || 2)) { try { const threshold = item.threshold || autoEvalConfig.threshold || this.config.threshold || 7; // Create fresh evaluator per attempt to avoid leaking state const evaluator = this._createEvaluator(); const data = await evaluator.evaluate(item.options, item.result, threshold, { ...autoEvalConfig, threshold, }); const result = { id: item.id, success: true, data, duration: Date.now() - itemStartTime, retryCount, }; succeeded++; completed++; reportProgress(); if (this.config.onItemComplete) { try { this.config.onItemComplete(result); } catch (callbackError) { logger.warn("[BatchEvaluator] onItemComplete callback threw an error", { error: callbackError instanceof Error ? callbackError.message : String(callbackError), }); } } return result; } catch (error) { lastError = error; // Check if error is retryable const isRetryable = error instanceof NeuroLinkFeatureError && isRetryableEvaluationError(error); if (isRetryable && retryCount < (this.config.maxRetries || 2)) { retryCount++; logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id }); await this.delay(this.config.retryDelay || 1000); continue; } // Not retryable or max retries exceeded break; } } // Failed after all retries const errorResult = { id: item.id, success: false, error: { message: lastError?.message || "Unknown error", code: lastError instanceof NeuroLinkFeatureError ? lastError.code : undefined, retryable: lastError instanceof NeuroLinkFeatureError ? lastError.retryable : false, }, duration: Date.now() - itemStartTime, retryCount, }; failed++; completed++; reportProgress(); if (this.config.onItemComplete) { try { this.config.onItemComplete(errorResult); } catch (callbackError) { logger.warn("[BatchEvaluator] onItemComplete callback threw an error", { error: callbackError instanceof Error ? callbackError.message : String(callbackError), }); } } if (!this.config.continueOnError) { throw lastError; } return errorResult; }; // Process items in batches based on concurrency for (let i = 0; i < items.length; i += concurrency) { const batch = items.slice(i, i + concurrency); const settled = await Promise.allSettled(batch.map(processItem)); const batchResults = []; for (const outcome of settled) { if (outcome.status === "fulfilled") { batchResults.push(outcome.value); } // Rejected outcomes are already handled inside processItem // (errors are caught and returned as error results when continueOnError is true, // or re-thrown which causes the settled entry to be 'rejected') } results.push(...batchResults); // If continueOnError is false and any item in this batch was rejected, throw aggregate if (!this.config.continueOnError) { const rejections = settled.filter((s) => s.status === "rejected"); if (rejections.length > 0) { const failedItems = results .filter((r) => !r.success) .map((r, idx) => ({ index: idx, error: new Error(r.error?.message || "Unknown error"), })); throw createBatchEvaluationError(rejections.length, items.length, failedItems); } } } // Calculate summary statistics const successfulResults = results.filter(hasEvaluationData); const scores = successfulResults.map((r) => r.data.overall); const passingScores = successfulResults.filter((r) => r.data.overall >= (autoEvalConfig.threshold || this.config.threshold || 7)); const summary = { total: items.length, succeeded, failed, averageScore: scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0, averageDuration: results.length > 0 ? results.reduce((a, b) => a + b.duration, 0) / results.length : 0, totalDuration: Date.now() - startTime, passingRate: successfulResults.length > 0 ? (passingScores.length / successfulResults.length) * 100 : 0, }; return { results, summary, allSucceeded: failed === 0, }; } /** * Evaluates items sequentially (one at a time). * Useful for debugging or when order matters. * * @param items - Array of items to evaluate * @param autoEvalConfig - Auto-evaluation configuration * @returns Batch evaluation results */ async evaluateSequential(items, autoEvalConfig = {}) { // Create a temporary evaluator with sequential config to avoid mutating shared state const sequentialEvaluator = new BatchEvaluator({ ...this.config, concurrency: 1, }); return sequentialEvaluator.evaluateBatch(items, autoEvalConfig); } /** * Gets the current configuration. */ getConfig() { return { ...this.config }; } /** * Updates the configuration. * * @param config - New configuration values */ updateConfig(config) { this.config = { ...this.config, ...config }; // Fresh evaluators are created per evaluation via _createEvaluator(), // so no shared evaluator needs to be re-created here. } /** * Helper to delay execution. */ delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } }