@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
397 lines (396 loc) • 15 kB
JavaScript
/**
* @file Evaluation Pipeline
* Multi-scorer orchestration with configurable execution
*/
import { logger } from "../../utils/logger.js";
import { ErrorFactory, withTimeout } from "../../utils/errorHandling.js";
import { DEFAULT_SCORE_SCALE } from "../scorers/baseScorer.js";
import { ScorerRegistry } from "../scorers/scorerRegistry.js";
/**
* Evaluation Pipeline for running multiple scorers
*/
export class EvaluationPipeline {
_config;
_scorers = new Map();
_initialized = false;
constructor(config) {
this._config = {
executionMode: "parallel",
stopOnFailure: false,
passThreshold: 0.7,
...config,
};
}
/**
* Get pipeline configuration
*/
get config() {
return this._config;
}
/**
* Check if pipeline is initialized
*/
get initialized() {
return this._initialized;
}
/**
* Initialize the pipeline by loading all scorers
*/
async initialize() {
if (this._initialized) {
return;
}
logger.debug(`Initializing evaluation pipeline: ${this._config.name ?? "unnamed"}`);
// Initialize registry
await ScorerRegistry.registerBuiltInScorers();
// Load all configured scorers using canonical IDs as map keys
for (const scorerDef of this._config.scorers) {
try {
const scorer = await ScorerRegistry.getScorer(scorerDef.id, scorerDef.config);
if (scorer) {
const canonicalId = scorer.metadata.id;
this._scorers.set(canonicalId, scorer);
logger.debug(`Loaded scorer: ${scorerDef.id} (canonical: ${canonicalId})`);
}
else {
logger.warn(`Scorer not found: ${scorerDef.id}`);
}
}
catch (error) {
logger.error(`Failed to load scorer: ${scorerDef.id}`, {
error: error instanceof Error ? error.message : String(error),
});
}
}
// Normalize requiredScorers to canonical IDs
if (this._config.requiredScorers) {
this._config.requiredScorers = this._config.requiredScorers.map((id) => {
// Look up by alias first, then try canonical
for (const [canonicalId, scorer] of this._scorers) {
if (scorer.metadata.id === id || canonicalId === id) {
return canonicalId;
}
}
return id;
});
}
// Validate required scorers are actually loaded
if (this._config.requiredScorers) {
const missing = this._config.requiredScorers.filter((id) => !this._scorers.has(id));
if (missing.length > 0) {
throw new Error(`Required scorers could not be loaded: ${missing.join(", ")}`);
}
}
this._initialized = true;
logger.debug(`Pipeline initialized with ${this._scorers.size} scorers`);
}
/**
* Execute the pipeline on input
*/
async execute(input, options) {
if (!this._initialized) {
await this.initialize();
}
this._validateExecutionOptions(options);
const startTime = Date.now();
const correlationId = options?.correlationId ?? `pipeline-${Date.now()}`;
logger.debug(`Executing pipeline: ${this._config.name ?? "unnamed"}`, {
correlationId,
scorerCount: this._scorers.size,
});
// Determine which scorers to run
const scorersToRun = this._getScorersToRun(options);
const skippedScorers = this._getSkippedScorers(options);
// Execute scorers
const results = [];
const errors = [];
if (this._config.executionMode === "parallel") {
// Parallel execution
const promises = scorersToRun.map(([id, scorer]) => this._executeScorer(id, scorer, input, options?.timeout));
const settledResults = await Promise.allSettled(promises);
for (let i = 0; i < settledResults.length; i++) {
const result = settledResults[i];
const [id] = scorersToRun[i];
if (result.status === "fulfilled") {
results.push(result.value);
if (result.value.error) {
errors.push({ scorerId: id, error: result.value.error });
}
}
else {
errors.push({
scorerId: id,
error: result.reason?.message ?? "Unknown error",
});
}
}
}
else {
// Sequential execution
for (const [id, scorer] of scorersToRun) {
try {
const result = await this._executeScorer(id, scorer, input, options?.timeout);
results.push(result);
if (result.error) {
errors.push({ scorerId: id, error: result.error });
}
// Check for stop on failure
if (this._config.stopOnFailure && !result.passed) {
logger.debug(`Stopping pipeline execution: scorer ${id} failed`);
break;
}
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
errors.push({ scorerId: id, error: errorMessage });
if (this._config.stopOnFailure) {
break;
}
}
}
}
// Aggregate results
const aggregated = this._aggregateScores(results);
const totalComputeTime = Date.now() - startTime;
// Check required scorers
const requiredScorers = this._config.requiredScorers ?? [];
const allRequiredPassed = requiredScorers.every((id) => {
const result = results.find((r) => r.scorerId === id);
return result?.passed ?? false;
});
const overallPassed = aggregated.normalizedScore >= (this._config.passThreshold ?? 0.7) &&
allRequiredPassed;
return {
scores: results,
overallScore: aggregated.score,
aggregationMethod: this._config.aggregation?.method ?? "average",
passed: overallPassed,
totalComputeTime,
timestamp: Date.now(),
correlationId,
pipelineConfig: this._config,
executionOptions: options,
errors,
skippedScorers,
};
}
_validateExecutionOptions(options) {
const hasOnlyScorers = !!options?.onlyScorers && options.onlyScorers.length > 0;
const hasSkipScorers = !!options?.skipScorers && options.skipScorers.length > 0;
if (hasOnlyScorers && hasSkipScorers) {
throw ErrorFactory.invalidConfiguration("evaluation pipeline execution options", "Cannot specify both 'onlyScorers' and 'skipScorers' options", {
onlyScorers: options?.onlyScorers,
skipScorers: options?.skipScorers,
});
}
}
/**
* Get scorers to run based on options
*/
_getScorersToRun(options) {
const allScorers = Array.from(this._scorers.entries());
const onlyScorers = options?.onlyScorers;
const skipScorers = options?.skipScorers;
if (onlyScorers && onlyScorers.length > 0) {
return allScorers.filter(([id]) => onlyScorers.includes(id));
}
if (skipScorers && skipScorers.length > 0) {
return allScorers.filter(([id]) => !skipScorers.includes(id));
}
return allScorers;
}
/**
* Get list of skipped scorers
*/
_getSkippedScorers(options) {
const allIds = Array.from(this._scorers.keys());
const onlyScorers = options?.onlyScorers;
const skipScorers = options?.skipScorers;
if (onlyScorers && onlyScorers.length > 0) {
return allIds.filter((id) => !onlyScorers.includes(id));
}
if (skipScorers && skipScorers.length > 0) {
return skipScorers.filter((id) => allIds.includes(id));
}
return [];
}
/**
* Execute a single scorer with timeout
*/
async _executeScorer(id, scorer, input, timeout) {
const scorerTimeout = timeout ?? scorer.config.timeout ?? this._config.timeout ?? 30000;
try {
const result = await withTimeout(scorer.score(input), scorerTimeout, new Error(`Scorer ${id} timed out after ${scorerTimeout}ms`));
return result;
}
catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return {
scorerId: id,
scorerName: scorer.metadata.name,
score: 0,
normalizedScore: 0,
scale: DEFAULT_SCORE_SCALE,
reasoning: `Scorer execution failed: ${errorMessage}`,
passed: false,
threshold: scorer.config.threshold ?? 0.7,
computeTime: 0,
error: errorMessage,
};
}
}
/**
* Aggregate scores based on configuration
*/
/**
* Rescale a result's score to the default 0-MAX scale using its own scale info
*/
_rescaleToDefault(result) {
const scale = result.scale ?? DEFAULT_SCORE_SCALE;
if (scale.max === scale.min) {
return 0;
}
// Normalize to 0-1 then rescale to default
const normalized = (result.score - scale.min) / (scale.max - scale.min);
return normalized * DEFAULT_SCORE_SCALE.max;
}
_aggregateScores(results) {
if (results.length === 0) {
return { score: 0, normalizedScore: 0 };
}
const aggregation = this._config.aggregation ?? { method: "average" };
const weights = aggregation.weights ?? {};
// Rescale all results to the common default scale before aggregation
const rescaled = results.map((r) => this._rescaleToDefault(r));
let score;
switch (aggregation.method) {
case "minimum":
score = Math.min(...rescaled);
break;
case "maximum":
score = Math.max(...rescaled);
break;
case "weighted": {
let totalWeight = 0;
let weightedSum = 0;
// Build a reverse map from canonical scorer ID to configured key
const configuredKeyMap = new Map();
for (const scorerDef of this._config.scorers) {
const scorer = this._scorers.get(scorerDef.id);
if (scorer) {
configuredKeyMap.set(scorer.metadata.id, scorerDef.id);
}
}
for (let i = 0; i < results.length; i++) {
const result = results[i];
const configuredKey = configuredKeyMap.get(result.scorerId);
const weight = weights[result.scorerId] ??
(configuredKey ? weights[configuredKey] : undefined) ??
1.0;
totalWeight += weight;
weightedSum += rescaled[i] * weight;
}
score = totalWeight > 0 ? weightedSum / totalWeight : 0;
break;
}
case "custom":
if (aggregation.customFn) {
score = aggregation.customFn(results);
// Clamp custom output to valid range
score = Math.max(0, Math.min(DEFAULT_SCORE_SCALE.max, score));
}
else {
score = rescaled.reduce((sum, s) => sum + s, 0) / rescaled.length;
}
break;
case "average":
default:
score = rescaled.reduce((sum, s) => sum + s, 0) / rescaled.length;
break;
}
const normalizedScore = score / DEFAULT_SCORE_SCALE.max;
return { score, normalizedScore };
}
/**
* Add a scorer to the pipeline
*/
addScorer(id, scorer) {
this._scorers.set(id, scorer);
// Update config
if (!this._config.scorers.some((s) => s.id === id)) {
this._config.scorers.push({ id });
}
}
/**
* Remove a scorer from the pipeline
*/
removeScorer(id) {
const removed = this._scorers.delete(id);
if (removed) {
this._config.scorers = this._config.scorers.filter((s) => s.id !== id);
this._config.requiredScorers = this._config.requiredScorers?.filter((requiredId) => requiredId !== id);
}
return removed;
}
/**
* Get a scorer by ID
*/
getScorer(id) {
return this._scorers.get(id);
}
/**
* Get all scorer IDs
*/
getScorerIds() {
return Array.from(this._scorers.keys());
}
/**
* Update pipeline configuration
*/
configure(config) {
this._config = { ...this._config, ...config };
}
/**
* Create a clone of this pipeline
*/
clone() {
const clonedConfig = {
...this._config,
scorers: this._config.scorers.map((s) => ({
id: s.id,
config: s.config ? { ...s.config } : undefined,
})),
aggregation: this._config.aggregation
? {
...this._config.aggregation,
weights: this._config.aggregation.weights
? { ...this._config.aggregation.weights }
: undefined,
}
: undefined,
requiredScorers: this._config.requiredScorers
? [...this._config.requiredScorers]
: undefined,
};
const cloned = new EvaluationPipeline(clonedConfig);
// Do not copy scorer instances to avoid shared mutable state
// (e.g., BaseLLMScorer.provider, initializationPromise).
// The cloned pipeline will create fresh scorers on initialize().
cloned._initialized = false;
return cloned;
}
}
/**
* Create a new evaluation pipeline
*/
export function createPipeline(config) {
return new EvaluationPipeline(config);
}
/**
* Create and initialize a pipeline
*/
export async function createAndInitializePipeline(config) {
const pipeline = new EvaluationPipeline(config);
await pipeline.initialize();
return pipeline;
}