@vfarcic/dot-ai
Version:
AI-powered development productivity platform that enhances software development workflows through intelligent automation and AI-driven assistance
96 lines • 3.44 kB
TypeScript
/**
* Base Comparative Evaluator
*
* Shared functionality for comparing multiple AI models across scenarios
* Eliminates code duplication between remediation, recommendation, and capability evaluators
*/
import { EvaluationScore } from './base.js';
import { VercelProvider } from '../../core/providers/vercel-provider';
import { DatasetAnalyzer, ComparisonScenario } from '../dataset-analyzer.js';
import { type EvaluationMetadata } from '../metadata-loader.js';
export interface ComparativeEvaluationResult {
scenario_summary: string;
models_compared: string[];
comparative_analysis: Record<string, {
quality_score?: number;
efficiency_score?: number;
performance_score?: number;
communication_score?: number;
accuracy_score?: number;
completeness_score?: number;
clarity_score?: number;
consistency_score?: number;
weighted_total: number;
strengths: string;
weaknesses: string;
}>;
ranking: Array<{
rank: number;
model: string;
score: number;
rationale?: string;
reasoning?: string;
}>;
overall_insights?: string;
}
export interface ComparativeEvaluationScore extends EvaluationScore {
modelRankings: Array<{
rank: number;
model: string;
score: number;
}>;
bestModel: string;
modelCount: number;
}
export declare abstract class BaseComparativeEvaluator {
abstract readonly name: string;
abstract readonly description: string;
protected abstract readonly promptFileName: string;
protected abstract readonly toolName: string;
protected evaluatorModel: VercelProvider;
protected datasetAnalyzer: DatasetAnalyzer;
protected promptTemplate: string;
protected metadata: EvaluationMetadata;
constructor(datasetDir?: string);
/**
* Initialize the evaluator - must be called by subclass constructor
*/
protected initializePrompt(): void;
/**
* Evaluate all available models for scenarios
* This method finds all scenarios with multiple model responses and evaluates them comparatively
*/
evaluateAllScenarios(): Promise<ComparativeEvaluationScore[]>;
/**
* Conduct final assessment across all scenarios to determine overall winner
*/
conductFinalAssessment(scenarioResults: ComparativeEvaluationScore[]): Promise<Record<string, unknown>>;
/**
* Evaluate a single scenario comparing all available models
*/
evaluateScenario(scenario: ComparisonScenario): Promise<ComparativeEvaluationScore>;
/**
* Build the evaluation prompt - can be overridden by subclasses for custom behavior
*/
protected buildEvaluationPrompt(scenario: ComparisonScenario, modelResponsesText: string, modelList: string): string;
/**
* Get statistics about available datasets
*/
getDatasetStats(): {
totalDatasets: number;
availableModels: string[];
scenariosWithMultipleModels: number;
interactionTypes: string[];
};
/**
* Get detailed breakdown of evaluation phases available
* Must be implemented by subclasses to provide domain-specific phase descriptions
*/
abstract getEvaluationPhases(): {
phase: string;
description: string;
availableModels: string[];
scenarioCount: number;
}[];
}
//# sourceMappingURL=base-comparative.d.ts.map