@arizeai/phoenix-client
Version:
A client for the Phoenix API
166 lines • 4.7 kB
TypeScript
import type { LLMEvaluator } from "@arizeai/phoenix-evals";
import { AnnotatorKind } from "./annotations.js";
import { Node } from "./core.js";
import { Example, ExampleWithId } from "./datasets.js";
/**
* An experiment is a set of task runs on a dataset version
*/
export interface ExperimentInfo extends Node {
datasetId: string;
datasetVersionId: string;
datasetSplits?: string[];
/**
* Number of times the experiment is repeated
*/
repetitions: number;
/**
* Metadata about the experiment as an object of key values
* e.x. model name
*/
metadata: Record<string, unknown>;
/**
* The project under which the experiment task traces are recorded
* Note: This can be null when no project is associated with the experiment
*/
projectName: string | null;
/**
* The creation timestamp of the experiment
*/
createdAt: string;
/**
* The last update timestamp of the experiment
*/
updatedAt: string;
/**
* Number of examples in the experiment
*/
exampleCount: number;
/**
* Number of successful runs in the experiment
*/
successfulRunCount: number;
/**
* Number of failed runs in the experiment
*/
failedRunCount: number;
/**
* Number of missing (not yet executed) runs in the experiment
*/
missingRunCount: number;
}
export type ExperimentRunID = string;
/**
* Represents incomplete experiment runs for a dataset example
* Groups all incomplete repetitions for a single example
*/
export interface IncompleteRun {
/**
* The dataset example that has incomplete runs
*/
datasetExample: Example;
/**
* List of repetition numbers that need to be run for this example
*/
repetitionNumbers: number[];
}
export interface IncompleteEvaluation {
/**
* The experiment run with incomplete evaluations
*/
experimentRun: ExperimentRun;
/**
* The dataset example for this run
*/
datasetExample: ExampleWithId;
/**
* List of evaluation names that are incomplete (either missing or failed)
*/
evaluationNames: string[];
}
/**
* A map of an experiment runId to the run
*/
export interface ExperimentRunsMap {
runs: Record<ExperimentRunID, ExperimentRun>;
}
/**
* An experiment that has been run and been recorded on the server
*/
export interface RanExperiment extends ExperimentInfo, ExperimentRunsMap {
evaluationRuns?: ExperimentEvaluationRun[];
}
/**
* The result of running an experiment on a single example
*/
export interface ExperimentRun extends Node {
startTime: Date;
endTime: Date;
/**
* What experiment the run belongs to
*/
experimentId: string;
datasetExampleId: string;
output?: string | boolean | number | object | null;
error: string | null;
traceId: string | null;
}
export type EvaluatorParams<TaskOutputType = TaskOutput> = {
/**
* The input field of the Dataset Example
*/
input: Example["input"];
/**
* The output of the task
*/
output: TaskOutputType;
/**
* The expected or reference output of the Dataset Example
*/
expected?: Example["output"];
/**
* Metadata associated with the Dataset Example
*/
metadata?: Example["metadata"];
};
export type Evaluator = {
name: string;
kind: AnnotatorKind;
evaluate: (args: EvaluatorParams) => Promise<EvaluationResult> | EvaluationResult;
};
export type EvaluationResult = {
score?: number | null;
label?: string | null;
metadata?: Record<string, unknown>;
explanation?: string | null;
};
export interface ExperimentEvaluationRun extends Node {
experimentRunId: string;
startTime: Date;
endTime: Date;
/**
* THe name of the evaluation
*/
name: string;
annotatorKind: AnnotatorKind;
error: string | null;
result: EvaluationResult | null;
/**
* The trace id of the evaluation
* This is null if the trace is deleted or never recorded
*/
traceId: string | null;
}
export type TaskOutput = string | boolean | number | object | null;
export type ExperimentTask = (example: Example) => Promise<TaskOutput> | TaskOutput;
export interface ExperimentParameters {
/**
* The number of examples to run the experiment on
*/
nExamples: number;
}
/**
* A type that represents any type of evaluator that can be used in an experiment.
* Unknown is used to capture evaluators from an external library such as phoenix-evals.
*/
export type ExperimentEvaluatorLike = Evaluator | LLMEvaluator<Record<string, unknown>>;
//# sourceMappingURL=experiments.d.ts.map