UNPKG

evalite

Version:

Test your LLM-powered apps with a TypeScript-native, Vitest-based eval runner. No API key required.

165 lines 5.28 kB
import type { Db } from "./db.js"; export declare namespace Evalite { type RunType = "full" | "partial"; type RunningServerState = { type: "running"; runType: RunType; filepaths: string[]; runId: number | bigint | undefined; evalNamesRunning: string[]; resultIdsRunning: (number | bigint)[]; }; type ServerState = RunningServerState | { type: "idle"; }; type MaybePromise<T> = T | Promise<T>; interface InitialResult { evalName: string; filepath: string; order: number; } interface ResultAfterFilesSaved extends InitialResult { /** * Technically, input and expected are known at the start * of the evaluation. But because they may be files, they * need to be saved asynchronously. * * This is why they are only included in the final result. */ input: unknown; expected?: unknown; } type ResultStatus = "success" | "fail"; type RenderedColumn = { label: string; value: unknown; }; interface Result extends ResultAfterFilesSaved { /** * Technically, input and expected are known at the start * of the evaluation. But because they may be files, they * need to be saved asynchronously. * * This is why they are only included in the final result. */ output: unknown; scores: Score[]; duration: number; traces: Trace[]; status: ResultStatus; renderedColumns: RenderedColumn[]; } type Score = { /** * A number between 0 and 1. * * Added null for compatibility with {@link https://github.com/braintrustdata/autoevals | autoevals}. * null scores will be reported as 0. */ score: number | null; name: string; description?: string; metadata?: unknown; }; type UserProvidedScoreWithMetadata = { score: number; metadata?: unknown; }; type ScoreInput<TInput, TOutput, TExpected> = { input: TInput; output: TOutput; expected?: TExpected; }; type TaskMeta = { initialResult?: InitialResult; resultAfterFilesSaved?: ResultAfterFilesSaved; result?: Result; duration: number | undefined; }; type Task<TInput, TOutput> = (input: TInput) => MaybePromise<TOutput | AsyncIterable<TOutput>>; type Scorer<TInput, TOutput, TExpected> = (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<Score>; type RunnerOpts<TInput, TOutput, TExpected> = { data: () => MaybePromise<{ input: TInput; expected?: TExpected; }[]>; task: Task<TInput, TOutput>; scorers: Array<Scorer<TInput, TOutput, TExpected> | ScorerOpts<TInput, TOutput, TExpected>>; /** * @deprecated Use `columns` instead. */ experimental_customColumns?: (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<RenderedColumn[]>; columns?: (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<RenderedColumn[]>; }; type ScorerOpts<TInput, TOutput, TExpected> = { name: string; description?: string; scorer: (input: Evalite.ScoreInput<TInput, TOutput, TExpected>) => Evalite.MaybePromise<number | Evalite.UserProvidedScoreWithMetadata>; }; interface Trace { input: unknown; usage?: { promptTokens: number; completionTokens: number; }; output: unknown; start: number; end: number; } type TracePrompt = { role: string; content: TracePromptTextContent[] | string; }; type TracePromptTextContent = { type: "text"; text: string; }; type File = { __EvaliteFile: true; path: string; }; namespace SDK { type GetEvalByNameResult = { history: { score: number; date: string; }[]; evaluation: Db.Eval & { results: (Db.Result & { scores: Db.Score[]; })[]; }; prevEvaluation: (Db.Eval & { results: (Db.Result & { scores: Db.Score[]; })[]; }) | undefined; }; type GetMenuItemsResultEval = { filepath: string; score: number; name: string; prevScore: number | undefined; evalStatus: Db.EvalStatus; }; type GetMenuItemsResult = { evals: GetMenuItemsResultEval[]; score: number; prevScore: number | undefined; evalStatus: Db.EvalStatus; }; type GetResultResult = { result: Db.Result & { traces: Db.Trace[]; score: number; scores: Db.Score[]; }; prevResult: (Db.Result & { score: number; scores: Db.Score[]; }) | undefined; evaluation: Db.Eval; }; } } //# sourceMappingURL=types.d.ts.map