UNPKG

parea-ai

Version:

Client SDK library to connect to Parea AI.

104 lines (103 loc) 3.94 kB
import { EvalFunctionReturn, EvaluatedLog, EvaluationResult, ExperimentStatus } from '../types'; /** * Represents the structure of experiment context values. */ export type ExperimentContextValues = { logs: EvaluatedLog[]; scores: EvaluationResult[]; }; /** * Represents a traced function that can be used in experiments. * @template T - The type of the function parameters * @template R - The return type of the function */ export type TracedFunction<T extends Record<string, any>, R> = (...args: [...Array<T[keyof T]>, T['target']]) => R | Promise<R>; /** * Options for configuring an experiment. * @param nTrials - The number of trials to run * @param metadata - Additional metadata for the experiment * @param nWorkers - The number of workers to use for parallel execution * @param datasetLevelEvalFuncs - An array of evaluation functions to run on the entire dataset * @param maxRetries - The maximum number of retries to wait for eval to finish. Each retry waits for 1s. Default is 60. */ export interface ExperimentOptions { nTrials?: number; metadata?: Record<string, any>; nWorkers?: number; datasetLevelEvalFuncs?: ((logs: EvaluatedLog[]) => EvalFunctionReturn)[]; maxRetries?: number; } /** * Options for configuring an experiment run. * @param runName - A name for this specific run of the experiment * @param prefix - A prefix for the experiment run name */ export interface RunOptions { runName?: string; prefix?: string; } /** * Represents the result of a single trial in an experiment. * @template T - The type of the input * @template R - The type of the output */ export declare class TrialResult<T, R> { input: T; output: R | null; error: Error | null; state: ExperimentStatus; scores: EvaluationResult[] | null; logs: EvaluatedLog[] | null; /** * Creates a new TrialResult instance. * @param input - The input of the trial * @param output - The output of the trial (null if error occurred) * @param error - The error that occurred during the trial (null if successful) * @param state - The status of the trial * @param scores - The evaluation scores of the trial (null if not available) * @param logs - The logs generated during the trial (null if not available) */ constructor(input: T, output: R | null, error: Error | null, state: ExperimentStatus, scores: EvaluationResult[] | null, logs: EvaluatedLog[] | null); } /** * Represents the aggregated results of an experiment. * @template T - The type of the input parameters * @template R - The type of the output */ export declare class ExperimentResult<T extends Record<string, any>, R> { name: string; results: TrialResult<T, R>[]; metadata: Record<string, any> | undefined; /** * Creates a new ExperimentResult instance. * @param name - The name of the experiment * @param results - An array of TrialResult instances * @param metadata - Additional metadata for the experiment */ constructor(name: string, results: TrialResult<T, R>[], metadata: Record<string, any> | undefined); /** * Calculates the success rate of the experiment. * @returns The percentage of successful trials */ getSuccessRate(): number; /** * Retrieves all logs from successful trials. * @returns An array of EvaluatedLog objects */ getLogs(): EvaluatedLog[]; /** * Retrieves all errors from failed trials. * @returns An array of Error objects */ getErrors(): Error[]; /** * Retrieves error messages from all failed trials. * @returns A string containing all error messages, separated by commas */ getErrorsString(): string; /** * Calculates the average scores across all trials. * @returns An object containing average scores for each evaluation metric */ getAverageScores(): Record<string, number>; }