evalite
Version:
Test your LLM-powered apps with a TypeScript-native, Vitest-based eval runner. No API key required.
165 lines • 5.28 kB
TypeScript
import type { Db } from "./db.js";
export declare namespace Evalite {
type RunType = "full" | "partial";
type RunningServerState = {
type: "running";
runType: RunType;
filepaths: string[];
runId: number | bigint | undefined;
evalNamesRunning: string[];
resultIdsRunning: (number | bigint)[];
};
type ServerState = RunningServerState | {
type: "idle";
};
type MaybePromise<T> = T | Promise<T>;
interface InitialResult {
evalName: string;
filepath: string;
order: number;
}
interface ResultAfterFilesSaved extends InitialResult {
/**
* Technically, input and expected are known at the start
* of the evaluation. But because they may be files, they
* need to be saved asynchronously.
*
* This is why they are only included in the final result.
*/
input: unknown;
expected?: unknown;
}
type ResultStatus = "success" | "fail";
type RenderedColumn = {
label: string;
value: unknown;
};
interface Result extends ResultAfterFilesSaved {
/**
* Technically, input and expected are known at the start
* of the evaluation. But because they may be files, they
* need to be saved asynchronously.
*
* This is why they are only included in the final result.
*/
output: unknown;
scores: Score[];
duration: number;
traces: Trace[];
status: ResultStatus;
renderedColumns: RenderedColumn[];
}
type Score = {
/**
* A number between 0 and 1.
*
* Added null for compatibility with {@link https://github.com/braintrustdata/autoevals | autoevals}.
* null scores will be reported as 0.
*/
score: number | null;
name: string;
description?: string;
metadata?: unknown;
};
type UserProvidedScoreWithMetadata = {
score: number;
metadata?: unknown;
};
type ScoreInput<TInput, TOutput, TExpected> = {
input: TInput;
output: TOutput;
expected?: TExpected;
};
type TaskMeta = {
initialResult?: InitialResult;
resultAfterFilesSaved?: ResultAfterFilesSaved;
result?: Result;
duration: number | undefined;
};
type Task<TInput, TOutput> = (input: TInput) => MaybePromise<TOutput | AsyncIterable<TOutput>>;
type Scorer<TInput, TOutput, TExpected> = (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<Score>;
type RunnerOpts<TInput, TOutput, TExpected> = {
data: () => MaybePromise<{
input: TInput;
expected?: TExpected;
}[]>;
task: Task<TInput, TOutput>;
scorers: Array<Scorer<TInput, TOutput, TExpected> | ScorerOpts<TInput, TOutput, TExpected>>;
/**
* @deprecated Use `columns` instead.
*/
experimental_customColumns?: (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<RenderedColumn[]>;
columns?: (opts: ScoreInput<TInput, TOutput, TExpected>) => MaybePromise<RenderedColumn[]>;
};
type ScorerOpts<TInput, TOutput, TExpected> = {
name: string;
description?: string;
scorer: (input: Evalite.ScoreInput<TInput, TOutput, TExpected>) => Evalite.MaybePromise<number | Evalite.UserProvidedScoreWithMetadata>;
};
interface Trace {
input: unknown;
usage?: {
promptTokens: number;
completionTokens: number;
};
output: unknown;
start: number;
end: number;
}
type TracePrompt = {
role: string;
content: TracePromptTextContent[] | string;
};
type TracePromptTextContent = {
type: "text";
text: string;
};
type File = {
__EvaliteFile: true;
path: string;
};
namespace SDK {
type GetEvalByNameResult = {
history: {
score: number;
date: string;
}[];
evaluation: Db.Eval & {
results: (Db.Result & {
scores: Db.Score[];
})[];
};
prevEvaluation: (Db.Eval & {
results: (Db.Result & {
scores: Db.Score[];
})[];
}) | undefined;
};
type GetMenuItemsResultEval = {
filepath: string;
score: number;
name: string;
prevScore: number | undefined;
evalStatus: Db.EvalStatus;
};
type GetMenuItemsResult = {
evals: GetMenuItemsResultEval[];
score: number;
prevScore: number | undefined;
evalStatus: Db.EvalStatus;
};
type GetResultResult = {
result: Db.Result & {
traces: Db.Trace[];
score: number;
scores: Db.Score[];
};
prevResult: (Db.Result & {
score: number;
scores: Db.Score[];
}) | undefined;
evaluation: Db.Eval;
};
}
}
//# sourceMappingURL=types.d.ts.map