UNPKG

ai-functions

Version:

Core AI primitives for building intelligent applications

73 lines 2.2 kB
/** * Simple eval runner for AI Functions * * Runs evals across multiple models and collects results. * Does not depend on evalite - uses our own infrastructure. */ import { generateObject, generateText } from '../generate.js'; import { schema } from '../schema.js'; import { type EvalModel, type ModelTier } from './models.js'; /** * Output function type for eval progress reporting */ export type EvalOutputFn = (message: string) => void; export interface EvalCase<TInput = unknown, TExpected = unknown> { name: string; input: TInput; expected?: TExpected; } export interface EvalScore { name: string; score: number; description?: string; metadata?: unknown; } export interface EvalResult<TOutput = unknown> { model: EvalModel; case: EvalCase; /** The output from the task. Will be null if an error occurred. */ output: TOutput | null; scores: EvalScore[]; latencyMs: number; cost: number; error?: string; } export interface EvalSummary { name: string; results: EvalResult[]; avgScore: number; byModel: Record<string, { avgScore: number; count: number; }>; totalCost: number; totalTime: number; } export interface RunEvalOptions<TInput, TOutput, TExpected> { name: string; cases: EvalCase<TInput, TExpected>[]; task: (input: TInput, model: EvalModel) => Promise<TOutput>; scorers: Array<{ name: string; description?: string; scorer: (args: { input: TInput; output: TOutput; expected?: TExpected; }) => number | Promise<number>; }>; models?: EvalModel[]; tiers?: ModelTier[]; providers?: string[]; concurrency?: number; /** Custom output function for progress reporting (defaults to logger.info) */ output?: EvalOutputFn; /** Whether to suppress progress output (defaults to false) */ quiet?: boolean; } /** * Run an eval suite across models */ export declare function runEval<TInput, TOutput, TExpected>(options: RunEvalOptions<TInput, TOutput, TExpected>): Promise<EvalSummary>; export { generateObject, generateText, schema }; //# sourceMappingURL=runner.d.ts.map