@arizeai/phoenix-client
Version:
A client for the Phoenix API
105 lines • 4.08 kB
TypeScript
import { type DiagLogLevel } from "@arizeai/phoenix-otel";
import { ClientFn } from "../types/core";
import type { ExperimentEvaluatorLike } from "../types/experiments";
import { type Logger } from "../types/logger";
export type ResumeEvaluationParams = ClientFn & {
/**
* The ID of the experiment to resume evaluations for
*/
readonly experimentId: string;
/**
* A single evaluator or list of evaluators to run on incomplete evaluations
*/
readonly evaluators: ExperimentEvaluatorLike | readonly ExperimentEvaluatorLike[];
/**
* The logger to use
* @default console
*/
readonly logger?: Logger;
/**
* The number of concurrent evaluation executions
* @default 5
*/
readonly concurrency?: number;
/**
* Whether to set the global tracer provider when running evaluators.
* @default true
*/
readonly setGlobalTracerProvider?: boolean;
/**
* Whether to use batch span processor for tracing.
* @default true
*/
readonly useBatchSpanProcessor?: boolean;
/**
* Log level to set for the default DiagConsoleLogger when tracing.
*/
readonly diagLogLevel?: DiagLogLevel;
/**
* Stop processing and exit as soon as any evaluation fails.
* @default false
*/
readonly stopOnFirstError?: boolean;
};
/**
* Resume incomplete evaluations for an experiment.
*
* This function identifies which evaluations have not been completed (either missing or failed)
* and runs the evaluators only for those runs. This is useful for:
* - Recovering from transient evaluator failures
* - Adding new evaluators to completed experiments
* - Completing partially evaluated experiments
*
* The function processes incomplete evaluations in batches using pagination to minimize memory usage.
*
* Evaluation names are matched to evaluator names. For example, if you pass
* an evaluator with name "accuracy", it will check for and resume any runs missing the "accuracy" evaluation.
*
* **Note:** Multi-output evaluators (evaluators that return an array of results) are not
* supported for resume operations. Each evaluator should produce a single evaluation
* result with a name matching the evaluator's name.
*
* @throws {Error} Throws different error types based on failure:
* - "EvaluationFetchError": Unable to fetch incomplete evaluations from the server.
* Always thrown regardless of stopOnFirstError, as it indicates critical infrastructure failure.
* - "EvaluationAbortedError": stopOnFirstError=true and an evaluator failed.
* Original error preserved in `cause` property.
* - Generic Error: Other evaluator execution errors or unexpected failures.
*
* @example
* ```ts
* import { resumeEvaluation } from "@arizeai/phoenix-client/experiments";
*
* // Standard usage: evaluation name matches evaluator name
* try {
* await resumeEvaluation({
* experimentId: "exp_123",
* evaluators: [{
* name: "correctness",
* kind: "CODE",
* evaluate: async ({ output, expected }) => ({
* score: output === expected ? 1 : 0
* })
* }],
* });
* } catch (error) {
* // Handle by error name (no instanceof needed)
* if (error.name === "EvaluationFetchError") {
* console.error("Failed to connect to server:", error.cause);
* } else if (error.name === "EvaluationAbortedError") {
* console.error("Evaluation stopped due to error:", error.cause);
* } else {
* console.error("Unexpected error:", error);
* }
* }
*
* // Stop on first error (useful for debugging)
* await resumeEvaluation({
* experimentId: "exp_123",
* evaluators: [myEvaluator],
* stopOnFirstError: true, // Exit immediately on first failure
* });
* ```
*/
export declare function resumeEvaluation({ client: _client, experimentId, evaluators: _evaluators, logger, concurrency, setGlobalTracerProvider, useBatchSpanProcessor, diagLogLevel, stopOnFirstError, }: ResumeEvaluationParams): Promise<void>;
//# sourceMappingURL=resumeEvaluation.d.ts.map