UNPKG

@arizeai/phoenix-client

Version:

A client for the Phoenix API

540 lines • 23 kB

JavaScript

"use strict"; var __asyncValues = (this && this.__asyncValues) || function (o) { if (!Symbol.asyncIterator) throw new TypeError("Symbol.asyncIterator is not defined."); var m = o[Symbol.asyncIterator], i; return m ? m.call(o) : (o = typeof __values === "function" ? __values(o) : o[Symbol.iterator](), i = {}, verb("next"), verb("throw"), verb("return"), i[Symbol.asyncIterator] = function () { return this; }, i); function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; } function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); } }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.resumeExperiment = resumeExperiment; const openinference_semantic_conventions_1 = require("@arizeai/openinference-semantic-conventions"); const phoenix_otel_1 = require("@arizeai/phoenix-otel"); const client_1 = require("../client"); const channel_1 = require("../utils/channel"); const ensureString_1 = require("../utils/ensureString"); const isHttpError_1 = require("../utils/isHttpError"); const toObjectHeaders_1 = require("../utils/toObjectHeaders"); const urlUtils_1 = require("../utils/urlUtils"); const getExperimentInfo_js_1 = require("./getExperimentInfo.js"); const resumeEvaluation_1 = require("./resumeEvaluation"); const tiny_invariant_1 = __importDefault(require("tiny-invariant")); /** * Error thrown when task is aborted due to a failure in stopOnFirstError mode. * This provides semantic context that the abort was intentional, not an infrastructure failure. * @internal - Not exported to minimize API surface area */ class TaskAbortedError extends Error { constructor(message, cause) { super(message); this.name = "TaskAbortedError"; this.cause = cause; } } /** * Error thrown when the producer fails to fetch incomplete runs from the server. * This is a critical error that should always be surfaced, even in stopOnFirstError=false mode. * @internal - Not exported to minimize API surface area */ class TaskFetchError extends Error { constructor(message, cause) { super(message); this.name = "TaskFetchError"; this.cause = cause; } } const DEFAULT_PAGE_SIZE = 50; /** * Channel capacity multiplier for producer-consumer buffering. * A value of 2 enables pipeline efficiency: workers process batch N while * the producer fetches batch N+1, eliminating idle time without excessive * memory usage. The channel blocks when full, providing natural backpressure. */ const CHANNEL_CAPACITY_MULTIPLIER = 2; /** * Transforms API incomplete run response to ExampleWithId */ function buildExampleFromApiResponse(apiExample) { return { id: apiExample.id, input: apiExample.input, output: apiExample.output || null, metadata: apiExample.metadata || {}, updatedAt: new Date(apiExample.updated_at), }; } /** * Handles fetch errors with helpful version information for unsupported features */ async function handleFetchError(error, client, featureName) { // Check if this is a JSON parse error (likely 404 HTML response from old server) const isJsonError = error instanceof SyntaxError && error.message.toLowerCase().includes("json"); if (isJsonError) { // Fetch server version to provide helpful context let versionInfo = ""; try { const baseUrl = client.config.baseUrl || ""; const versionRes = await fetch(`${baseUrl}/arize_phoenix_version`); if (versionRes.ok) { const version = await versionRes.text(); versionInfo = ` Your current server version is ${version}.`; } } catch (_a) { // Ignore errors fetching version } throw new Error(`The ${featureName} feature is not available on this Phoenix server. ` + "Please upgrade your Phoenix server to use this feature." + versionInfo); } throw error; } /** * Sets up OpenTelemetry tracer for experiment tracing */ function setupTracer({ projectName, baseUrl, headers, useBatchSpanProcessor, diagLogLevel, setGlobalTracerProvider, }) { if (!projectName) { return null; } const provider = (0, phoenix_otel_1.register)({ projectName, url: baseUrl, headers, batch: useBatchSpanProcessor, diagLogLevel, global: setGlobalTracerProvider, }); const tracer = provider.getTracer(projectName); return { provider, tracer }; } /** * Prints experiment summary to logger */ function printExperimentSummary({ logger, experimentId, totalProcessed, totalCompleted, }) { logger.info("\n" + "=".repeat(70)); logger.info("📊 Experiment Resume Summary"); logger.info("=".repeat(70)); logger.info(`Experiment ID: ${experimentId}`); logger.info(`Incomplete runs processed: ${totalProcessed}`); logger.info(`Successfully completed: ${totalCompleted}`); logger.info("=".repeat(70)); } /** * Resume an incomplete experiment by running only the missing or failed runs. * * This function identifies which (example, repetition) pairs have not been completed * (either missing or failed) and re-runs the task only for those pairs. Optionally, * evaluators can be run on the completed runs after task execution. * * The function processes incomplete runs in batches using pagination to minimize memory usage. * * @throws {Error} Throws different error types based on failure: * - "TaskFetchError": Unable to fetch incomplete runs from the server. * Always thrown regardless of stopOnFirstError, as it indicates critical infrastructure failure. * - "TaskAbortedError": stopOnFirstError=true and a task failed. * Original error preserved in `cause` property. * - Generic Error: Other task execution errors or unexpected failures. * * @example * ```ts * import { resumeExperiment } from "@arizeai/phoenix-client/experiments"; * * // Resume an interrupted experiment * try { * await resumeExperiment({ * experimentId: "exp_123", * task: myTask, * }); * } catch (error) { * // Handle by error name (no instanceof needed) * if (error.name === "TaskFetchError") { * console.error("Failed to connect to server:", error.cause); * } else if (error.name === "TaskAbortedError") { * console.error("Task stopped due to error:", error.cause); * } else { * console.error("Unexpected error:", error); * } * } * * // Resume with evaluators * await resumeExperiment({ * experimentId: "exp_123", * task: myTask, * evaluators: [correctnessEvaluator, relevanceEvaluator], * }); * * // Stop on first error (useful for debugging) * await resumeExperiment({ * experimentId: "exp_123", * task: myTask, * stopOnFirstError: true, // Exit immediately on first task failure * }); * ``` */ async function resumeExperiment({ client: _client, experimentId, task, evaluators, logger = console, concurrency = 5, setGlobalTracerProvider = true, useBatchSpanProcessor = true, diagLogLevel, stopOnFirstError = false, }) { var _a, _b; const client = _client !== null && _client !== void 0 ? _client : (0, client_1.createClient)(); const pageSize = DEFAULT_PAGE_SIZE; // Get experiment info logger.info(`🔍 Fetching experiment info...`); const experiment = await (0, getExperimentInfo_js_1.getExperimentInfo)({ client, experimentId }); // Check if there are incomplete runs const totalExpected = experiment.exampleCount * experiment.repetitions; const incompleteCount = totalExpected - experiment.successfulRunCount; if (incompleteCount === 0) { logger.info("✅ No incomplete runs found. Experiment is already complete."); return; } logger.info(`🧪 Resuming experiment with ${incompleteCount} incomplete runs...`); // Get base URL for tracing and URL generation const baseUrl = client.config.baseUrl; (0, tiny_invariant_1.default)(baseUrl, "Phoenix base URL not found. Please set PHOENIX_HOST or set baseUrl on the client."); // Initialize tracer (only if experiment has a project_name) const tracerSetup = setupTracer({ projectName: experiment.projectName, baseUrl, headers: client.config.headers ? (0, toObjectHeaders_1.toObjectHeaders)(client.config.headers) : undefined, useBatchSpanProcessor, diagLogLevel, setGlobalTracerProvider, }); const provider = (_a = tracerSetup === null || tracerSetup === void 0 ? void 0 : tracerSetup.provider) !== null && _a !== void 0 ? _a : null; const taskTracer = (_b = tracerSetup === null || tracerSetup === void 0 ? void 0 : tracerSetup.tracer) !== null && _b !== void 0 ? _b : null; // Display URLs const datasetExperimentsUrl = (0, urlUtils_1.getDatasetExperimentsUrl)({ baseUrl, datasetId: experiment.datasetId, }); const experimentUrl = (0, urlUtils_1.getExperimentUrl)({ baseUrl, datasetId: experiment.datasetId, experimentId: experiment.id, }); logger.info(`📺 View dataset experiments: ${datasetExperimentsUrl}`); logger.info(`🔗 View this experiment: ${experimentUrl}`); // Create a CSP-style bounded buffer for task distribution const taskChannel = new channel_1.Channel(pageSize * CHANNEL_CAPACITY_MULTIPLIER); // Abort controller for stopOnFirstError coordination const abortController = new AbortController(); const { signal } = abortController; let totalProcessed = 0; let totalCompleted = 0; let totalFailed = 0; // Producer: Fetch incomplete runs and send to channel async function fetchIncompleteRuns() { var _a, _b, _c; let cursor = null; try { do { // Stop fetching if abort signal received if (signal.aborted) { logger.info("🛑 Stopping fetch due to error in task"); break; } let res; try { res = await client.GET("/v1/experiments/{experiment_id}/incomplete-runs", { params: { path: { experiment_id: experimentId, }, query: { cursor, limit: pageSize, }, }, }); } catch (error) { // Check for version compatibility issues and throw helpful error try { await handleFetchError(error, client, "resume_experiment"); // TypeScript: handleFetchError never returns, but add throw for safety throw new Error("handleFetchError should never return"); } catch (handledError) { // Wrap the error (from handleFetchError or original) in semantic error type throw new TaskFetchError("Failed to fetch incomplete runs from server", handledError instanceof Error ? handledError : undefined); } } cursor = (_b = (_a = res.data) === null || _a === void 0 ? void 0 : _a.next_cursor) !== null && _b !== void 0 ? _b : null; const batchIncomplete = (_c = res.data) === null || _c === void 0 ? void 0 : _c.data; (0, tiny_invariant_1.default)(batchIncomplete, "Failed to fetch incomplete runs"); if (batchIncomplete.length === 0) { break; } // Send tasks to channel (blocks if channel is full - natural backpressure!) let batchCount = 0; for (const incomplete of batchIncomplete) { // Stop sending items if abort signal received if (signal.aborted) { break; } const example = buildExampleFromApiResponse(incomplete.dataset_example); for (const repNum of incomplete.repetition_numbers) { // Stop sending items if abort signal received if (signal.aborted) { break; } await taskChannel.send({ example, repetitionNumber: repNum }); batchCount++; totalProcessed++; } } logger.info(`Fetched batch of ${batchCount} incomplete runs (channel buffer: ${taskChannel.length})`); } while (cursor !== null && !signal.aborted); } catch (error) { // Re-throw with context preservation if (error instanceof TaskFetchError) { throw error; } // ChannelError from blocked send() should bubble up naturally // (happens when channel closes while producer is blocked) if (error instanceof channel_1.ChannelError) { throw error; } // Wrap any unexpected errors from channel operations throw new TaskFetchError("Unexpected error during task fetch", error instanceof Error ? error : undefined); } finally { taskChannel.close(); // Signal workers we're done } } // Worker: Process tasks from channel async function processTasksFromChannel() { var _a, e_1, _b, _c; try { for (var _d = true, taskChannel_1 = __asyncValues(taskChannel), taskChannel_1_1; taskChannel_1_1 = await taskChannel_1.next(), _a = taskChannel_1_1.done, !_a; _d = true) { _c = taskChannel_1_1.value; _d = false; const item = _c; // Stop processing if abort signal received if (signal.aborted) { break; } try { await runSingleTask({ client, experimentId, task, example: item.example, repetitionNumber: item.repetitionNumber, tracer: taskTracer, }); totalCompleted++; } catch (error) { totalFailed++; logger.error(`Failed to run task for example ${item.example.id}, repetition ${item.repetitionNumber}: ${error}`); // If stopOnFirstError is enabled, abort and re-throw if (stopOnFirstError) { logger.error("🛑 Stopping on first error"); abortController.abort(); throw error; } } } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (!_d && !_a && (_b = taskChannel_1.return)) await _b.call(taskChannel_1); } finally { if (e_1) throw e_1.error; } } } // Start concurrent execution // Wrap in try-finally to ensure channel is always closed, even if Promise.all throws let executionError = null; try { const producerTask = fetchIncompleteRuns(); const workerTasks = Array.from({ length: concurrency }, () => processTasksFromChannel()); // Wait for producer and all workers to finish await Promise.all([producerTask, ...workerTasks]); } catch (error) { // Classify and handle errors based on their nature const err = error instanceof Error ? error : new Error(String(error)); // Always surface producer/infrastructure errors if (error instanceof TaskFetchError) { // Producer failed - this is ALWAYS critical regardless of stopOnFirstError logger.error(`❌ Critical: Failed to fetch incomplete runs from server`); executionError = err; } else if (error instanceof channel_1.ChannelError && signal.aborted) { // Channel closed due to intentional abort - wrap in semantic error executionError = new TaskAbortedError("Task execution stopped due to error in concurrent worker", err); } else if (stopOnFirstError) { // Worker error in stopOnFirstError mode - already logged by worker executionError = err; } else { // Unexpected error (not from worker, not from producer fetch) // This could be a bug in our code or infrastructure failure logger.error(`❌ Unexpected error during task execution: ${err.message}`); executionError = err; } } finally { // Ensure channel is closed even if there are unexpected errors // This is a safety net in case producer's finally block didn't execute if (!taskChannel.isClosed) { taskChannel.close(); } } // Only show completion message if we didn't stop on error if (!executionError) { logger.info(`✅ Task runs completed.`); } if (totalFailed > 0 && !executionError) { logger.info(`⚠️ Warning: ${totalFailed} out of ${totalProcessed} runs failed.`); } // Run evaluators if provided (only on runs missing evaluations) // Skip evaluators if we stopped on error if (evaluators && evaluators.length > 0 && !executionError) { logger.info(`\n🔬 Running evaluators...`); await (0, resumeEvaluation_1.resumeEvaluation)({ experimentId, evaluators: [...evaluators], client, logger, concurrency, setGlobalTracerProvider, useBatchSpanProcessor, diagLogLevel, stopOnFirstError, }); } // Print summary printExperimentSummary({ logger, experimentId: experiment.id, totalProcessed, totalCompleted, }); // Flush spans (if tracer was initialized) if (provider) { await provider.forceFlush(); } // Re-throw error if stopOnFirstError was triggered if (executionError) { throw executionError; } } /** * Record task result to API (without executing the task). */ async function recordTaskResult({ client, experimentId, example, repetitionNumber, output, error, startTime, endTime, traceId = null, }) { try { await client.POST("/v1/experiments/{experiment_id}/runs", { params: { path: { experiment_id: experimentId, }, }, body: { dataset_example_id: example.id, repetition_number: repetitionNumber, output: output, start_time: startTime.toISOString(), end_time: endTime.toISOString(), error: error ? (0, ensureString_1.ensureString)(error) : undefined, trace_id: traceId, }, }); } catch (err) { // Ignore 409 Conflict - result already exists (idempotency) if ((0, isHttpError_1.isHttpErrorWithStatus)(err, 409)) { return; } throw err; // Re-throw other errors } } /** * Run a single task and record the result with optional tracing. */ async function runSingleTask({ client, experimentId, task, example, repetitionNumber, tracer, }) { const startTime = new Date(); // If no tracer (no project_name), execute without tracing if (!tracer) { let output = null; // Initialize to null for failed tasks let error; try { output = await Promise.resolve(task(example)); } catch (err) { error = err instanceof Error ? err.message : String(err); throw err; } finally { const endTime = new Date(); await recordTaskResult({ client, experimentId, example, repetitionNumber, output, error, startTime, endTime, }); } return; } // With tracer: wrap execution in a span for observability return tracer.startActiveSpan(`Task: ${task.name || "anonymous"}`, async (span) => { // Set span attributes span.setAttributes(Object.assign({ [openinference_semantic_conventions_1.SemanticConventions.OPENINFERENCE_SPAN_KIND]: openinference_semantic_conventions_1.OpenInferenceSpanKind.CHAIN, [openinference_semantic_conventions_1.SemanticConventions.INPUT_VALUE]: (0, ensureString_1.ensureString)(example.input), [openinference_semantic_conventions_1.SemanticConventions.INPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON }, (0, phoenix_otel_1.objectAsAttributes)({ experiment_id: experimentId, dataset_example_id: example.id, repetition_number: repetitionNumber, }))); let output = null; // Initialize to null for failed tasks let error; try { // Execute the task (only once!) output = await Promise.resolve(task(example)); // Set output attributes span.setAttributes({ [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_VALUE]: (0, ensureString_1.ensureString)(output), [openinference_semantic_conventions_1.SemanticConventions.OUTPUT_MIME_TYPE]: openinference_semantic_conventions_1.MimeType.JSON, }); span.setStatus({ code: phoenix_otel_1.SpanStatusCode.OK }); } catch (err) { error = err instanceof Error ? err.message : String(err); span.setStatus({ code: phoenix_otel_1.SpanStatusCode.ERROR, message: error, }); span.recordException(err); throw err; } finally { const endTime = new Date(); span.end(); // Record result to API await recordTaskResult({ client, experimentId, example, repetitionNumber, output, error, startTime, endTime, traceId: span.spanContext().traceId, }); } }); } //# sourceMappingURL=resumeExperiment.js.map