@wavequery/conductor
Version:
Modular LLM orchestration framework
116 lines (114 loc) • 4.06 kB
JavaScript
import { Logger, LogLevel } from "@/utils/logger";
export class EvalReporter {
constructor() {
this.logger = new Logger({
level: LogLevel.DEBUG,
prefix: "EvalReporter",
});
}
generateReport(results, options = {
format: "markdown",
includeMetadata: true,
includeTimestamp: true,
includeSummary: true,
includeRecommendations: true,
}) {
try {
const sections = [
options.includeSummary && this.generateSummary(results),
this.generateDetailed(results, options),
options.includeRecommendations && this.generateRecommendations(results),
].filter(Boolean);
return sections.join("\n\n");
}
catch (error) {
this.logger.error("Report generation failed:", error);
throw error;
}
}
async generateAsyncReport(results, options) {
return new Promise((resolve, reject) => {
try {
const report = this.generateReport(results, options);
resolve(report);
}
catch (error) {
reject(error);
}
});
}
generateSummary(results) {
const avgScore = results.reduce((sum, r) => sum + r.score, 0) / results.length;
const timestamp = new Date().toISOString();
const successfulMetrics = results.filter((r) => r.score >= 0.7).length;
return `
# Evaluation Summary
- Overall Score: ${(avgScore * 100).toFixed(2)}%
- Total Metrics: ${results.length}
- Successful Metrics: ${successfulMetrics}
- Failed Metrics: ${results.length - successfulMetrics}
- Timestamp: ${timestamp}
`.trim();
}
generateDetailed(results, options) {
return `
# Detailed Results
${results
.map((r) => `
## ${r.metricName}
- Score: ${(r.score * 100).toFixed(2)}%
${options.includeMetadata ? this.formatMetadata(r.metadata) : ""}
${options.includeTimestamp ? `- Timestamp: ${r.timestamp.toISOString()}` : ""}
${r.evaluator ? `- Evaluator: ${r.evaluator}` : ""}
`)
.join("\n")}
`.trim();
}
formatMetadata(metadata) {
return Object.entries(metadata)
.map(([key, value]) => `- ${key}: ${JSON.stringify(value)}`)
.join("\n");
}
generateRecommendations(results) {
const lowScores = results.filter((r) => r.score < 0.7);
if (lowScores.length === 0) {
return "# Recommendations\nAll metrics are performing well! No immediate improvements needed.";
}
return `
# Recommendations
${lowScores
.map((r) => `
- Improve ${r.metricName}
- Current score: ${(r.score * 100).toFixed(2)}%
- Action needed: ${this.getRecommendation(r)}
`)
.join("\n")}
`.trim();
}
getRecommendation(result) {
switch (result.metricName) {
case "responseTime":
return "Consider optimizing performance or increasing compute resources";
case "tokenUsage":
return "Review prompt engineering to reduce token consumption";
case "accuracy":
return "Review model configuration and input preprocessing";
default:
return "Review implementation and configuration";
}
}
exportToJSON(results) {
return JSON.stringify({
summary: {
averageScore: results.reduce((sum, r) => sum + r.score, 0) / results.length,
totalMetrics: results.length,
timestamp: new Date().toISOString(),
},
results: results.map((r) => ({
...r,
timestamp: r.timestamp.toISOString(),
})),
}, null, 2);
}
}
//# sourceMappingURL=eval-reporter.js.map