@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
271 lines (270 loc) • 10.7 kB
JavaScript
/**
* @file BatchEvaluator - Supports batch evaluation of multiple responses.
* Enables parallel evaluation with configurable concurrency and error handling.
*/
import { Evaluator } from "./index.js";
import { createBatchEvaluationError, isRetryableEvaluationError, } from "./errors/EvaluationError.js";
import { logger } from "../utils/logger.js";
import { NeuroLinkFeatureError } from "../core/infrastructure/index.js";
function hasEvaluationData(result) {
return result.success && result.data !== undefined;
}
/**
* BatchEvaluator - Performs evaluation on multiple items in parallel.
* Supports configurable concurrency, retry logic, and progress tracking.
*
* @example
* ```typescript
* const batchEvaluator = new BatchEvaluator({
* concurrency: 3,
* continueOnError: true,
* onProgress: (progress) => console.log(`${progress.percentComplete}% complete`)
* });
*
* const items = [
* { id: '1', options: opts1, result: result1 },
* { id: '2', options: opts2, result: result2 },
* ];
*
* const batchResult = await batchEvaluator.evaluateBatch(items);
* console.log(`Passing rate: ${batchResult.summary.passingRate}%`);
* ```
*/
export class BatchEvaluator {
config;
constructor(config = {}) {
this.config = {
concurrency: 5,
continueOnError: true,
maxRetries: 2,
retryDelay: 1000,
...config,
};
}
/**
* Create a fresh Evaluator instance for each evaluation to avoid leaking state.
*/
_createEvaluator() {
return new Evaluator(this.config);
}
/**
* Evaluates a batch of items in parallel with controlled concurrency.
*
* @param items - Array of items to evaluate
* @param autoEvalConfig - Auto-evaluation configuration for thresholds
* @returns Batch evaluation results with summary statistics
*/
async evaluateBatch(items, autoEvalConfig = {}) {
const startTime = Date.now();
const results = [];
const concurrency = this.config.concurrency || 5;
// Track progress
let completed = 0;
let succeeded = 0;
let failed = 0;
const reportProgress = () => {
if (this.config.onProgress) {
try {
this.config.onProgress({
total: items.length,
completed,
succeeded,
failed,
pending: items.length - completed,
percentComplete: Math.round((completed / items.length) * 100),
});
}
catch (callbackError) {
logger.warn("[BatchEvaluator] onProgress callback threw an error", {
error: callbackError instanceof Error
? callbackError.message
: String(callbackError),
});
}
}
};
// Process items with concurrency limit
const processItem = async (item) => {
const itemStartTime = Date.now();
let retryCount = 0;
let lastError;
while (retryCount <= (this.config.maxRetries || 2)) {
try {
const threshold = item.threshold ||
autoEvalConfig.threshold ||
this.config.threshold ||
7;
// Create fresh evaluator per attempt to avoid leaking state
const evaluator = this._createEvaluator();
const data = await evaluator.evaluate(item.options, item.result, threshold, {
...autoEvalConfig,
threshold,
});
const result = {
id: item.id,
success: true,
data,
duration: Date.now() - itemStartTime,
retryCount,
};
succeeded++;
completed++;
reportProgress();
if (this.config.onItemComplete) {
try {
this.config.onItemComplete(result);
}
catch (callbackError) {
logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
error: callbackError instanceof Error
? callbackError.message
: String(callbackError),
});
}
}
return result;
}
catch (error) {
lastError = error;
// Check if error is retryable
const isRetryable = error instanceof NeuroLinkFeatureError &&
isRetryableEvaluationError(error);
if (isRetryable && retryCount < (this.config.maxRetries || 2)) {
retryCount++;
logger.debug(`[BatchEvaluator.evaluateBatch] Retrying evaluation for item ${item.id}`, { attempt: retryCount + 1, itemId: item.id });
await this.delay(this.config.retryDelay || 1000);
continue;
}
// Not retryable or max retries exceeded
break;
}
}
// Failed after all retries
const errorResult = {
id: item.id,
success: false,
error: {
message: lastError?.message || "Unknown error",
code: lastError instanceof NeuroLinkFeatureError
? lastError.code
: undefined,
retryable: lastError instanceof NeuroLinkFeatureError
? lastError.retryable
: false,
},
duration: Date.now() - itemStartTime,
retryCount,
};
failed++;
completed++;
reportProgress();
if (this.config.onItemComplete) {
try {
this.config.onItemComplete(errorResult);
}
catch (callbackError) {
logger.warn("[BatchEvaluator] onItemComplete callback threw an error", {
error: callbackError instanceof Error
? callbackError.message
: String(callbackError),
});
}
}
if (!this.config.continueOnError) {
throw lastError;
}
return errorResult;
};
// Process items in batches based on concurrency
for (let i = 0; i < items.length; i += concurrency) {
const batch = items.slice(i, i + concurrency);
const settled = await Promise.allSettled(batch.map(processItem));
const batchResults = [];
for (const outcome of settled) {
if (outcome.status === "fulfilled") {
batchResults.push(outcome.value);
}
// Rejected outcomes are already handled inside processItem
// (errors are caught and returned as error results when continueOnError is true,
// or re-thrown which causes the settled entry to be 'rejected')
}
results.push(...batchResults);
// If continueOnError is false and any item in this batch was rejected, throw aggregate
if (!this.config.continueOnError) {
const rejections = settled.filter((s) => s.status === "rejected");
if (rejections.length > 0) {
const failedItems = results
.filter((r) => !r.success)
.map((r, idx) => ({
index: idx,
error: new Error(r.error?.message || "Unknown error"),
}));
throw createBatchEvaluationError(rejections.length, items.length, failedItems);
}
}
}
// Calculate summary statistics
const successfulResults = results.filter(hasEvaluationData);
const scores = successfulResults.map((r) => r.data.overall);
const passingScores = successfulResults.filter((r) => r.data.overall >=
(autoEvalConfig.threshold || this.config.threshold || 7));
const summary = {
total: items.length,
succeeded,
failed,
averageScore: scores.length > 0
? scores.reduce((a, b) => a + b, 0) / scores.length
: 0,
averageDuration: results.length > 0
? results.reduce((a, b) => a + b.duration, 0) / results.length
: 0,
totalDuration: Date.now() - startTime,
passingRate: successfulResults.length > 0
? (passingScores.length / successfulResults.length) * 100
: 0,
};
return {
results,
summary,
allSucceeded: failed === 0,
};
}
/**
* Evaluates items sequentially (one at a time).
* Useful for debugging or when order matters.
*
* @param items - Array of items to evaluate
* @param autoEvalConfig - Auto-evaluation configuration
* @returns Batch evaluation results
*/
async evaluateSequential(items, autoEvalConfig = {}) {
// Create a temporary evaluator with sequential config to avoid mutating shared state
const sequentialEvaluator = new BatchEvaluator({
...this.config,
concurrency: 1,
});
return sequentialEvaluator.evaluateBatch(items, autoEvalConfig);
}
/**
* Gets the current configuration.
*/
getConfig() {
return { ...this.config };
}
/**
* Updates the configuration.
*
* @param config - New configuration values
*/
updateConfig(config) {
this.config = { ...this.config, ...config };
// Fresh evaluators are created per evaluation via _createEvaluator(),
// so no shared evaluator needs to be re-created here.
}
/**
* Helper to delay execution.
*/
delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}