@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
378 lines • 13.5 kB
JavaScript
/**
* @file EvaluationAggregator - Aggregates and analyzes evaluation results.
* Provides statistical analysis, trend detection, and summary generation.
*/
import { evaluationErrors } from "./errors/EvaluationError.js";
/**
* EvaluationAggregator - Aggregates evaluation results and provides analytics.
* Supports statistical analysis, trend detection, and quality monitoring.
*
* @example
* ```typescript
* const aggregator = new EvaluationAggregator();
*
* // Add evaluations
* aggregator.addEvaluation(evaluation1);
* aggregator.addEvaluation(evaluation2);
*
* // Get aggregation
* const result = aggregator.aggregate({ threshold: 7 });
* console.log(`Average score: ${result.statistics.mean}`);
* console.log(`Passing rate: ${result.passingRate}%`);
*
* // Get trend analysis
* const trend = aggregator.analyzeSequenceTrend();
* console.log(`Quality is ${trend.direction}`);
* ```
*/
export class EvaluationAggregator {
evaluations = [];
/**
* Adds an evaluation to the aggregator.
*
* @param evaluation - The evaluation data to add
*/
addEvaluation(evaluation) {
this.evaluations.push(evaluation);
}
/**
* Adds multiple evaluations to the aggregator.
*
* @param evaluations - Array of evaluation data to add
*/
addEvaluations(evaluations) {
this.evaluations.push(...evaluations);
}
/**
* Clears all evaluations from the aggregator.
*/
clear() {
this.evaluations = [];
}
/**
* Gets the current number of evaluations.
*/
getCount() {
return this.evaluations.length;
}
/**
* Gets all evaluations.
*/
getEvaluations() {
return [...this.evaluations];
}
/**
* Aggregates all evaluations and returns comprehensive statistics.
*
* @param options - Aggregation options
* @returns Comprehensive aggregation result
*/
aggregate(options = {}) {
const threshold = options.threshold || 7;
if (this.evaluations.length === 0) {
throw evaluationErrors.create("AGGREGATION_ERROR", "Cannot aggregate: no evaluations available", { retryable: false });
}
const overallScores = this.evaluations.map((e) => e.overall);
const relevanceScores = this.evaluations.map((e) => e.relevance);
const accuracyScores = this.evaluations.map((e) => e.accuracy);
const completenessScores = this.evaluations.map((e) => e.completeness);
const statistics = this.calculateStatistics(overallScores);
const distribution = this.calculateDistribution(overallScores);
const dimensions = this.analyzeDimensions(relevanceScores, accuracyScores, completenessScores, overallScores);
const alerts = this.summarizeAlerts();
const passingCount = this.evaluations.filter((e) => e.overall >= threshold).length;
const passingRate = (passingCount / this.evaluations.length) * 100;
const avgEvaluationTime = this.evaluations.reduce((sum, e) => sum + e.evaluationTime, 0) /
this.evaluations.length;
const evaluationModels = Array.from(new Set(this.evaluations.map((e) => e.evaluationModel)));
return {
count: this.evaluations.length,
statistics,
distribution,
dimensions,
sequenceTrend: this.evaluations.length >= 3 ? this.analyzeSequenceTrend() : undefined,
alerts,
passingRate,
avgEvaluationTime,
metadata: {
aggregatedAt: new Date().toISOString(),
threshold,
evaluationModels,
},
};
}
/**
* Calculates statistical summary for a set of scores.
*
* @param scores - Array of scores
* @returns Statistical summary
*/
calculateStatistics(scores) {
if (scores.length === 0) {
return {
min: 0,
max: 0,
mean: 0,
median: 0,
stdDev: 0,
variance: 0,
p25: 0,
p75: 0,
p90: 0,
p95: 0,
};
}
const sorted = [...scores].sort((a, b) => a - b);
const n = sorted.length;
const sum = scores.reduce((a, b) => a + b, 0);
const mean = sum / n;
const squaredDiffs = scores.map((s) => Math.pow(s - mean, 2));
const variance = squaredDiffs.reduce((a, b) => a + b, 0) / n;
const stdDev = Math.sqrt(variance);
return {
min: sorted[0],
max: sorted[n - 1],
mean,
median: this.percentile(sorted, 50),
stdDev,
variance,
p25: this.percentile(sorted, 25),
p75: this.percentile(sorted, 75),
p90: this.percentile(sorted, 90),
p95: this.percentile(sorted, 95),
};
}
/**
* Calculates the distribution of scores across quality ranges.
*
* @param scores - Array of scores
* @returns Score distribution
*/
calculateDistribution(scores) {
return {
poor: scores.filter((s) => s >= 1 && s <= 3).length,
belowAverage: scores.filter((s) => s >= 4 && s <= 5).length,
average: scores.filter((s) => s >= 6 && s <= 7).length,
good: scores.filter((s) => s >= 8 && s <= 9).length,
excellent: scores.filter((s) => s >= 10).length,
};
}
/**
* Analyzes sequence-based trends in evaluation scores (based on insertion order, not time).
*
* @param windowSize - Moving average window size (default: 5)
* @returns Trend analysis
*/
analyzeSequenceTrend(windowSize = 5) {
const scores = this.evaluations.map((e) => e.overall);
if (scores.length < 2) {
return {
direction: "stable",
slope: 0,
rSquared: 0,
percentChange: 0,
movingAverage: scores[0] || 0,
};
}
// Calculate linear regression
const n = scores.length;
const indices = scores.map((_, i) => i);
const sumX = indices.reduce((a, b) => a + b, 0);
const sumY = scores.reduce((a, b) => a + b, 0);
const sumXY = indices.reduce((sum, x, i) => sum + x * scores[i], 0);
const sumXX = indices.reduce((sum, x) => sum + x * x, 0);
const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX);
const intercept = (sumY - slope * sumX) / n;
// Calculate R-squared
const yMean = sumY / n;
const ssTotal = scores.reduce((sum, y) => sum + Math.pow(y - yMean, 2), 0);
const ssResidual = scores.reduce((sum, y, i) => {
const predicted = slope * i + intercept;
return sum + Math.pow(y - predicted, 2);
}, 0);
const rSquared = ssTotal === 0 ? 0 : 1 - ssResidual / ssTotal;
// Calculate moving average
const window = Math.min(windowSize, scores.length);
const recentScores = scores.slice(-window);
const movingAverage = recentScores.reduce((a, b) => a + b, 0) / recentScores.length;
// Calculate percent change
const percentChange = scores[0] !== 0
? ((scores[scores.length - 1] - scores[0]) / scores[0]) * 100
: 0;
// Determine direction
let direction;
if (Math.abs(slope) < 0.05) {
direction = "stable";
}
else if (slope > 0) {
direction = "improving";
}
else {
direction = "declining";
}
return {
direction,
slope,
rSquared,
percentChange,
movingAverage,
};
}
/**
* Analyzes each evaluation dimension separately.
*
* @param relevance - Relevance scores
* @param accuracy - Accuracy scores
* @param completeness - Completeness scores
* @param overall - Overall scores
* @returns Dimension analysis
*/
analyzeDimensions(relevance, accuracy, completeness, overall) {
return {
relevance: this.calculateStatistics(relevance),
accuracy: this.calculateStatistics(accuracy),
completeness: this.calculateStatistics(completeness),
overall: this.calculateStatistics(overall),
correlations: {
relevanceAccuracy: this.correlation(relevance, accuracy),
relevanceCompleteness: this.correlation(relevance, completeness),
accuracyCompleteness: this.correlation(accuracy, completeness),
},
};
}
/**
* Summarizes alert information from evaluations.
*
* @returns Alert summary
*/
summarizeAlerts() {
const highAlerts = this.evaluations.filter((e) => e.alertSeverity === "high").length;
const mediumAlerts = this.evaluations.filter((e) => e.alertSeverity === "medium").length;
const offTopicCount = this.evaluations.filter((e) => e.isOffTopic).length;
const total = highAlerts + mediumAlerts;
return {
total,
high: highAlerts,
medium: mediumAlerts,
offTopic: offTopicCount,
alertRate: (total / this.evaluations.length) * 100,
};
}
/**
* Calculates a specific percentile from sorted data.
*
* @param sorted - Sorted array of numbers
* @param p - Percentile (0-100)
* @returns The value at the percentile
*/
percentile(sorted, p) {
if (sorted.length === 0) {
return 0;
}
if (sorted.length === 1) {
return sorted[0];
}
const index = (p / 100) * (sorted.length - 1);
const lower = Math.floor(index);
const upper = Math.ceil(index);
if (lower === upper) {
return sorted[lower];
}
const fraction = index - lower;
return sorted[lower] * (1 - fraction) + sorted[upper] * fraction;
}
/**
* Calculates Pearson correlation between two arrays.
*
* @param x - First array
* @param y - Second array
* @returns Correlation coefficient (-1 to 1)
*/
correlation(x, y) {
if (x.length !== y.length || x.length === 0) {
return 0;
}
const n = x.length;
const sumX = x.reduce((a, b) => a + b, 0);
const sumY = y.reduce((a, b) => a + b, 0);
const sumXY = x.reduce((sum, xi, i) => sum + xi * y[i], 0);
const sumXX = x.reduce((sum, xi) => sum + xi * xi, 0);
const sumYY = y.reduce((sum, yi) => sum + yi * yi, 0);
const numerator = n * sumXY - sumX * sumY;
const denominator = Math.sqrt((n * sumXX - sumX * sumX) * (n * sumYY - sumY * sumY));
return denominator === 0 ? 0 : numerator / denominator;
}
/**
* Gets evaluations that failed to meet the threshold.
*
* @param threshold - The passing threshold
* @returns Array of failing evaluations
*/
getFailingEvaluations(threshold = 7) {
return this.evaluations.filter((e) => e.overall < threshold);
}
/**
* Gets evaluations with high severity alerts.
*
* @returns Array of high-alert evaluations
*/
getHighAlertEvaluations() {
return this.evaluations.filter((e) => e.alertSeverity === "high");
}
/**
* Gets evaluations marked as off-topic.
*
* @returns Array of off-topic evaluations
*/
getOffTopicEvaluations() {
return this.evaluations.filter((e) => e.isOffTopic);
}
/**
* Gets the top N performing evaluations.
*
* @param n - Number of evaluations to return
* @returns Array of top evaluations
*/
getTopEvaluations(n = 5) {
return [...this.evaluations]
.sort((a, b) => b.overall - a.overall)
.slice(0, n);
}
/**
* Gets the bottom N performing evaluations.
*
* @param n - Number of evaluations to return
* @returns Array of bottom evaluations
*/
getBottomEvaluations(n = 5) {
return [...this.evaluations]
.sort((a, b) => a.overall - b.overall)
.slice(0, n);
}
/**
* Generates a text summary of the aggregation.
*
* @param threshold - The passing threshold
* @returns Human-readable summary
*/
generateSummary(threshold = 7) {
if (this.evaluations.length === 0) {
return "No evaluations to summarize.";
}
const result = this.aggregate({ threshold });
const trend = result.sequenceTrend;
let summary = `Evaluation Summary (${result.count} evaluations):\n`;
summary += `- Average Score: ${result.statistics.mean.toFixed(2)}/10\n`;
summary += `- Passing Rate: ${result.passingRate.toFixed(1)}%\n`;
summary += `- Score Range: ${result.statistics.min} - ${result.statistics.max}\n`;
summary += `- Alert Rate: ${result.alerts.alertRate.toFixed(1)}%\n`;
if (trend) {
summary += `- Quality Trend: ${trend.direction} (slope: ${trend.slope.toFixed(3)})\n`;
}
if (result.alerts.high > 0) {
summary += `\nWarning: ${result.alerts.high} high-severity alerts detected.\n`;
}
return summary;
}
}
//# sourceMappingURL=EvaluationAggregator.js.map