UNPKG

@arizeai/phoenix-client

Version:

A client for the Phoenix API

105 lines 4.08 kB
import { type DiagLogLevel } from "@arizeai/phoenix-otel"; import { ClientFn } from "../types/core"; import type { ExperimentEvaluatorLike } from "../types/experiments"; import { type Logger } from "../types/logger"; export type ResumeEvaluationParams = ClientFn & { /** * The ID of the experiment to resume evaluations for */ readonly experimentId: string; /** * A single evaluator or list of evaluators to run on incomplete evaluations */ readonly evaluators: ExperimentEvaluatorLike | readonly ExperimentEvaluatorLike[]; /** * The logger to use * @default console */ readonly logger?: Logger; /** * The number of concurrent evaluation executions * @default 5 */ readonly concurrency?: number; /** * Whether to set the global tracer provider when running evaluators. * @default true */ readonly setGlobalTracerProvider?: boolean; /** * Whether to use batch span processor for tracing. * @default true */ readonly useBatchSpanProcessor?: boolean; /** * Log level to set for the default DiagConsoleLogger when tracing. */ readonly diagLogLevel?: DiagLogLevel; /** * Stop processing and exit as soon as any evaluation fails. * @default false */ readonly stopOnFirstError?: boolean; }; /** * Resume incomplete evaluations for an experiment. * * This function identifies which evaluations have not been completed (either missing or failed) * and runs the evaluators only for those runs. This is useful for: * - Recovering from transient evaluator failures * - Adding new evaluators to completed experiments * - Completing partially evaluated experiments * * The function processes incomplete evaluations in batches using pagination to minimize memory usage. * * Evaluation names are matched to evaluator names. For example, if you pass * an evaluator with name "accuracy", it will check for and resume any runs missing the "accuracy" evaluation. * * **Note:** Multi-output evaluators (evaluators that return an array of results) are not * supported for resume operations. Each evaluator should produce a single evaluation * result with a name matching the evaluator's name. * * @throws {Error} Throws different error types based on failure: * - "EvaluationFetchError": Unable to fetch incomplete evaluations from the server. * Always thrown regardless of stopOnFirstError, as it indicates critical infrastructure failure. * - "EvaluationAbortedError": stopOnFirstError=true and an evaluator failed. * Original error preserved in `cause` property. * - Generic Error: Other evaluator execution errors or unexpected failures. * * @example * ```ts * import { resumeEvaluation } from "@arizeai/phoenix-client/experiments"; * * // Standard usage: evaluation name matches evaluator name * try { * await resumeEvaluation({ * experimentId: "exp_123", * evaluators: [{ * name: "correctness", * kind: "CODE", * evaluate: async ({ output, expected }) => ({ * score: output === expected ? 1 : 0 * }) * }], * }); * } catch (error) { * // Handle by error name (no instanceof needed) * if (error.name === "EvaluationFetchError") { * console.error("Failed to connect to server:", error.cause); * } else if (error.name === "EvaluationAbortedError") { * console.error("Evaluation stopped due to error:", error.cause); * } else { * console.error("Unexpected error:", error); * } * } * * // Stop on first error (useful for debugging) * await resumeEvaluation({ * experimentId: "exp_123", * evaluators: [myEvaluator], * stopOnFirstError: true, // Exit immediately on first failure * }); * ``` */ export declare function resumeEvaluation({ client: _client, experimentId, evaluators: _evaluators, logger, concurrency, setGlobalTracerProvider, useBatchSpanProcessor, diagLogLevel, stopOnFirstError, }: ResumeEvaluationParams): Promise<void>; //# sourceMappingURL=resumeEvaluation.d.ts.map