@wavequery/conductor
Version:
Modular LLM orchestration framework
79 lines (78 loc) • 1.99 kB
TypeScript
export interface EvaluationMetric<T = any> {
name: string;
description: string;
compute(actual: T, expected: T): Promise<number>;
normalize?(value: number): number;
threshold?: number;
}
export interface TestCase<Input = any, Output = any> {
id: string;
name: string;
description?: string;
input: Input;
expectedOutput: Output;
metadata?: Record<string, any>;
timeout?: number;
}
export interface TestSuite<Input = any, Output = any> {
name: string;
description?: string;
testCases: TestCase<Input, Output>[];
setup?: () => Promise<void>;
teardown?: () => Promise<void>;
metadata?: Record<string, any>;
}
export interface EvaluationResult {
testCase: TestCase;
success: boolean;
actualOutput: any;
metrics: Record<string, MetricResult>;
error?: Error;
duration: number;
metadata?: Record<string, any>;
}
export interface EvaluationSummary {
totalTests: number;
passedTests: number;
failedTests: number;
averageMetrics: Record<string, number>;
duration: number;
timestamp: Date;
}
export interface MetricResult {
value: number;
details?: Record<string, any>;
}
export interface SQLTestCase {
id: string;
description: string;
question: string;
context: any;
query: string;
results?: any[];
metadata?: Record<string, any>;
}
export interface EvaluationOptions {
metrics: SQLMetric[];
timeout?: number;
parallel?: boolean;
}
export interface SQLMetric {
name: string;
description: string;
compute(generated: SQLTestCase, expected: SQLTestCase): Promise<MetricResult>;
}
export interface EvaluationReport {
summary: {
totalTests: number;
passedTests: number;
failedTests: number;
averageMetrics: Record<string, number>;
};
results: EvaluationResult[];
metadata: {
timestamp: Date;
duration: number;
options: EvaluationOptions;
};
}