ai-functions
Version:
Core AI primitives for building intelligent applications
73 lines • 2.2 kB
TypeScript
/**
* Simple eval runner for AI Functions
*
* Runs evals across multiple models and collects results.
* Does not depend on evalite - uses our own infrastructure.
*/
import { generateObject, generateText } from '../generate.js';
import { schema } from '../schema.js';
import { type EvalModel, type ModelTier } from './models.js';
/**
* Output function type for eval progress reporting
*/
export type EvalOutputFn = (message: string) => void;
export interface EvalCase<TInput = unknown, TExpected = unknown> {
name: string;
input: TInput;
expected?: TExpected;
}
export interface EvalScore {
name: string;
score: number;
description?: string;
metadata?: unknown;
}
export interface EvalResult<TOutput = unknown> {
model: EvalModel;
case: EvalCase;
/** The output from the task. Will be null if an error occurred. */
output: TOutput | null;
scores: EvalScore[];
latencyMs: number;
cost: number;
error?: string;
}
export interface EvalSummary {
name: string;
results: EvalResult[];
avgScore: number;
byModel: Record<string, {
avgScore: number;
count: number;
}>;
totalCost: number;
totalTime: number;
}
export interface RunEvalOptions<TInput, TOutput, TExpected> {
name: string;
cases: EvalCase<TInput, TExpected>[];
task: (input: TInput, model: EvalModel) => Promise<TOutput>;
scorers: Array<{
name: string;
description?: string;
scorer: (args: {
input: TInput;
output: TOutput;
expected?: TExpected;
}) => number | Promise<number>;
}>;
models?: EvalModel[];
tiers?: ModelTier[];
providers?: string[];
concurrency?: number;
/** Custom output function for progress reporting (defaults to logger.info) */
output?: EvalOutputFn;
/** Whether to suppress progress output (defaults to false) */
quiet?: boolean;
}
/**
* Run an eval suite across models
*/
export declare function runEval<TInput, TOutput, TExpected>(options: RunEvalOptions<TInput, TOutput, TExpected>): Promise<EvalSummary>;
export { generateObject, generateText, schema };
//# sourceMappingURL=runner.d.ts.map