parea-ai
Version:
Client SDK library to connect to Parea AI.
104 lines (103 loc) • 3.94 kB
TypeScript
import { EvalFunctionReturn, EvaluatedLog, EvaluationResult, ExperimentStatus } from '../types';
/**
* Represents the structure of experiment context values.
*/
export type ExperimentContextValues = {
logs: EvaluatedLog[];
scores: EvaluationResult[];
};
/**
* Represents a traced function that can be used in experiments.
* @template T - The type of the function parameters
* @template R - The return type of the function
*/
export type TracedFunction<T extends Record<string, any>, R> = (...args: [...Array<T[keyof T]>, T['target']]) => R | Promise<R>;
/**
* Options for configuring an experiment.
* @param nTrials - The number of trials to run
* @param metadata - Additional metadata for the experiment
* @param nWorkers - The number of workers to use for parallel execution
* @param datasetLevelEvalFuncs - An array of evaluation functions to run on the entire dataset
* @param maxRetries - The maximum number of retries to wait for eval to finish. Each retry waits for 1s. Default is 60.
*/
export interface ExperimentOptions {
nTrials?: number;
metadata?: Record<string, any>;
nWorkers?: number;
datasetLevelEvalFuncs?: ((logs: EvaluatedLog[]) => EvalFunctionReturn)[];
maxRetries?: number;
}
/**
* Options for configuring an experiment run.
* @param runName - A name for this specific run of the experiment
* @param prefix - A prefix for the experiment run name
*/
export interface RunOptions {
runName?: string;
prefix?: string;
}
/**
* Represents the result of a single trial in an experiment.
* @template T - The type of the input
* @template R - The type of the output
*/
export declare class TrialResult<T, R> {
input: T;
output: R | null;
error: Error | null;
state: ExperimentStatus;
scores: EvaluationResult[] | null;
logs: EvaluatedLog[] | null;
/**
* Creates a new TrialResult instance.
* @param input - The input of the trial
* @param output - The output of the trial (null if error occurred)
* @param error - The error that occurred during the trial (null if successful)
* @param state - The status of the trial
* @param scores - The evaluation scores of the trial (null if not available)
* @param logs - The logs generated during the trial (null if not available)
*/
constructor(input: T, output: R | null, error: Error | null, state: ExperimentStatus, scores: EvaluationResult[] | null, logs: EvaluatedLog[] | null);
}
/**
* Represents the aggregated results of an experiment.
* @template T - The type of the input parameters
* @template R - The type of the output
*/
export declare class ExperimentResult<T extends Record<string, any>, R> {
name: string;
results: TrialResult<T, R>[];
metadata: Record<string, any> | undefined;
/**
* Creates a new ExperimentResult instance.
* @param name - The name of the experiment
* @param results - An array of TrialResult instances
* @param metadata - Additional metadata for the experiment
*/
constructor(name: string, results: TrialResult<T, R>[], metadata: Record<string, any> | undefined);
/**
* Calculates the success rate of the experiment.
* @returns The percentage of successful trials
*/
getSuccessRate(): number;
/**
* Retrieves all logs from successful trials.
* @returns An array of EvaluatedLog objects
*/
getLogs(): EvaluatedLog[];
/**
* Retrieves all errors from failed trials.
* @returns An array of Error objects
*/
getErrors(): Error[];
/**
* Retrieves error messages from all failed trials.
* @returns A string containing all error messages, separated by commas
*/
getErrorsString(): string;
/**
* Calculates the average scores across all trials.
* @returns An object containing average scores for each evaluation metric
*/
getAverageScores(): Record<string, number>;
}