UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

judgmentlabs.ai

JudgmentLabs/judgeval-js

798 lines • 43.4 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.JudgmentClient = void 0; const dotenv = __importStar(require("dotenv")); const axios_1 = __importDefault(require("axios")); const example_js_1 = require("./data/example.js"); const result_js_1 = require("./data/result.js"); const base_scorer_js_1 = require("./scorers/base-scorer.js"); const evaluation_run_js_1 = require("./evaluation-run.js"); const rules_js_1 = require("./rules.js"); const run_evaluation_js_1 = require("./run-evaluation.js"); const constants_js_1 = require("./constants.js"); const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js")); // Keep progress bar imports if used elsewhere (e.g., waitForEvaluation) const cli_progress_1 = __importDefault(require("cli-progress")); const ansi_colors_1 = __importDefault(require("ansi-colors")); const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js"); // Load environment variables dotenv.config(); /** * Singleton implementation for JudgmentClient */ class JudgmentClient { /** * Get the singleton instance of JudgmentClient */ static getInstance(judgmentApiKey, organizationId) { if (!JudgmentClient.instance) { JudgmentClient.instance = new JudgmentClient(judgmentApiKey, organizationId); } return JudgmentClient.instance; } /** * Constructor for JudgmentClient * @param judgmentApiKey The Judgment API key * @param organizationId The organization ID */ constructor(judgmentApiKey, organizationId) { this.judgmentApiKey = judgmentApiKey || process.env.JUDGMENT_API_KEY || ''; this.organizationId = organizationId || process.env.JUDGMENT_ORG_ID || ''; // Keep this as direct output console.log('Successfully initialized JudgmentClient!'); if (!this.judgmentApiKey) { // Use logger for internal error, but throw for user logger_instance_js_1.default.error('JUDGMENT_API_KEY is not set.'); throw new Error('Judgment API key is required. Set it in the constructor or as an environment variable JUDGMENT_API_KEY.'); } if (!this.organizationId) { throw new Error('Organization ID is required. Set it in the constructor or as an environment variable JUDGMENT_ORG_ID.'); } } /** * Run an evaluation asynchronously */ aRunEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) { return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, rules) { // Simply call runEvaluation with asyncExecution=true return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, useJudgment, ignoreErrors, true, // Set asyncExecution to true rules); }); } /** * Run an evaluation */ runEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) { return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules) { try { // Load appropriate implementations for all scorers const loadedScorers = []; for (const scorer of scorers) { try { if (scorer instanceof base_scorer_js_1.ScorerWrapper) { loadedScorers.push(scorer.loadImplementation(useJudgment)); } else { // Assume scorers passed are already JudgevalScorer or APIJudgmentScorer loadedScorers.push(scorer); } } catch (error) { throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`); } } // Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) { throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.'); } // Convert ScorerWrapper in rules to their implementations let loadedRules; if (rules) { loadedRules = []; for (const rule of rules) { try { const processedConditions = []; for (const condition of rule.conditions) { // Convert metric if it's a ScorerWrapper if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) { try { // Create a new Condition object with the loaded implementation const loadedMetric = condition.metric.loadImplementation(useJudgment); const newCondition = new rules_js_1.Condition(loadedMetric); // Copy other properties from the original condition if necessary // Example: newCondition.threshold = condition.threshold; Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric })); // Copy all properties, overriding metric processedConditions.push(newCondition); } catch (error) { throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`); } } else { processedConditions.push(condition); } } // Create new rule with processed conditions const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId); loadedRules.push(newRule); } catch (error) { throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`); } } } const evaluationRun = new evaluation_run_js_1.EvaluationRun({ logResults, projectName, evalName: evalRunName, examples, scorers: loadedScorers, model, aggregator, metadata, judgmentApiKey: this.judgmentApiKey, rules: loadedRules, organizationId: this.organizationId }); return (0, run_evaluation_js_1.runEval)(evaluationRun, override, ignoreErrors, asyncExecution); } catch (error) { if (error instanceof Error) { if (error.message.includes('one or more fields are invalid')) { throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`); } else { throw new Error(`An unexpected error occurred during evaluation: ${error.message}`); } } else { throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`); } } }); } /** * Run an evaluation with a simplified interface (recommended) * @param config Configuration object for the evaluation * @returns Promise<ScoringResult[]> The evaluation results */ evaluate(config) { return __awaiter(this, void 0, void 0, function* () { // Set default values const { examples, scorers, model = 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo', aggregator = undefined, metadata = {}, projectName = 'default_project', evalName = `eval-run-${Date.now()}`, logResults = true, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules = undefined, override = false } = config; // Call the original runEvaluation method with the extracted parameters return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalName, override, useJudgment, ignoreErrors, asyncExecution, rules); }); } /** * Evaluate a dataset */ evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) { return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) { // Keep type loose for stub throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.'); }); } /** * Pull evaluation results from the server * * @param projectName Name of the project * @param evalRunName Name of the evaluation run * @returns Array of evaluation result objects with the same format as the Python SDK */ pullEval(projectName, evalRunName) { return __awaiter(this, void 0, void 0, function* () { const evalRunRequestBody = { project_name: projectName, eval_name: evalRunName, judgment_api_key: this.judgmentApiKey }; try { logger_instance_js_1.default.info(`Pulling evaluation results for project '${projectName}', run '${evalRunName}'`); const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${this.judgmentApiKey}`, "X-Organization-Id": this.organizationId } }); // Ensure we return the data in the exact same format as the Python SDK return response.data; } catch (error) { if (axios_1.default.isAxiosError(error) && error.response) { const errorMessage = `Error fetching eval results: ${JSON.stringify(error.response.data)}`; logger_instance_js_1.default.error(errorMessage); throw new Error(errorMessage); } else { const errorMessage = `Unknown error during pullEval: ${error}`; logger_instance_js_1.default.error(errorMessage); throw error; } } }); } /** * Retrieves evaluation results with retry mechanism * @param projectName Name of the project * @param evalRunName Name of the evaluation run * @param options Configuration options for retries * @returns The evaluation results or null if not available after all retries */ pullEvalWithRetry(projectName_1, evalRunName_1) { return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) { // Default options const maxRetries = options.maxRetries || 3; const initialDelayMs = options.initialDelayMs || 2000; const maxDelayMs = options.maxDelayMs || 30000; const backoffFactor = options.backoffFactor || 2; let lastError = null; for (let attempt = 0; attempt < maxRetries; attempt++) { try { // Calculate delay with exponential backoff, capped at maxDelayMs const delayMs = Math.min(initialDelayMs * Math.pow(backoffFactor, attempt), maxDelayMs); if (attempt > 0) { logger_instance_js_1.default.info(`Retry attempt ${attempt + 1}/${maxRetries} for pullEval after ${delayMs}ms delay`); yield new Promise(resolve => setTimeout(resolve, delayMs)); } const results = yield this.pullEval(projectName, evalRunName); return results; } catch (error) { lastError = error; // Check if we should retry based on error type if (axios_1.default.isAxiosError(error) && error.response) { const status = error.response.status; // Don't retry for client errors (except 429 Too Many Requests) if (status >= 400 && status < 500 && status !== 429) { logger_instance_js_1.default.error(`Not retrying due to client error: ${status}`); throw error; } } logger_instance_js_1.default.warn(`Attempt ${attempt + 1} failed, ${attempt < maxRetries - 1 ? 'will retry' : 'giving up'}`); } } // If we get here, all retries failed logger_instance_js_1.default.error(`All ${maxRetries} retry attempts failed for pullEval`); throw lastError || new Error('Failed to retrieve evaluation results after all retry attempts'); }); } /** * Export evaluation results to a file format * @param projectName Name of the project * @param evalRunName Name of the evaluation run * @param format Export format ('json' or 'csv') * @returns The exported data as a string */ exportEvalResults(projectName_1, evalRunName_1) { return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') { logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`); try { const resultsData = yield this.pullEval(projectName, evalRunName); if (!resultsData || resultsData.length === 0 || !resultsData[0].results) { logger_instance_js_1.default.warn('No results found to export.'); return ''; } const results = resultsData[0].results; if (format === 'json') { // Pretty print JSON return JSON.stringify(results.map(r => r.toJSON()), null, 2); } else if (format === 'csv') { if (results.length === 0) return ''; // No data to export // Dynamically determine headers from the first result object // Flatten the structure for CSV const flatResults = results.map(result => { var _a, _b, _c; const flat = {}; const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : []; // Add example data fields (snake_case) for (const key in exampleData) { // Prefix example fields to avoid collision, e.g., example_input flat[`example_${key}`] = exampleData[key]; } // Add scorers data scorersData.forEach(scorer => { flat[`scorer_${scorer.name}_score`] = scorer.score; flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata); flat[`scorer_${scorer.name}_error`] = scorer.error; }); // Add top-level error if present flat['top_level_error'] = result.error; return flat; }); // Get all unique keys from the flattened results for headers const headers = Array.from(new Set(flatResults.flatMap(Object.keys))); // Use papaparse for robust CSV generation const Papa = require('papaparse'); // Use require here if not imported at top const csv = Papa.unparse({ fields: headers, data: flatResults }, { header: true, quotes: true, // Ensure fields with commas/newlines are quoted quoteChar: '"', escapeChar: '"', delimiter: ',' }); return csv; } else { throw new Error(`Unsupported export format: ${format}`); } } catch (error) { logger_instance_js_1.default.error(`Error exporting eval results: ${error}`); this.handleApiError(error, 'exportEvalResults'); throw error; } }); } /** * Delete an evaluation from the server */ deleteEval(projectName, evalRunNames) { return __awaiter(this, void 0, void 0, function* () { logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`); const requestBody = { project_name: projectName, eval_names: evalRunNames, judgment_api_key: this.judgmentApiKey, }; try { yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, { headers: this.getAuthHeaders() }); logger_instance_js_1.default.info('Successfully deleted eval runs.'); return true; } catch (error) { logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`); this.handleApiError(error, 'deleteEval'); return false; } }); } /** * Delete all evaluations from the server for a given project */ deleteProjectEvals(projectName) { return __awaiter(this, void 0, void 0, function* () { logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`); const requestBody = { project_name: projectName, judgment_api_key: this.judgmentApiKey, }; try { yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, { headers: this.getAuthHeaders() }); logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`); return true; } catch (error) { logger_instance_js_1.default.error(`Error deleting project evals: ${error}`); this.handleApiError(error, 'deleteProjectEvals'); return false; } }); } /** * Create a project on the server */ createProject(projectName) { return __awaiter(this, void 0, void 0, function* () { logger_instance_js_1.default.info(`Creating project: ${projectName}`); const requestBody = { project_name: projectName, judgment_api_key: this.judgmentApiKey, }; try { logger_instance_js_1.default.info(`Creating project: ${projectName}`); const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, { headers: this.getAuthHeaders() }); // Check for specific success message or status if API provides one if (response.data && response.data.message === 'Project added successfully') { logger_instance_js_1.default.info(`Successfully created project: ${projectName}`); return true; } else if (response.data && response.data.message === 'Project already exists') { logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`); return true; // Or false, depending on desired behavior for existing projects } else { logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`); return false; } } catch (error) { logger_instance_js_1.default.error(`Error creating project: ${error}`); this.handleApiError(error, 'createProject'); return false; } }); } /** * Delete a project from the server */ deleteProject(projectName) { return __awaiter(this, void 0, void 0, function* () { logger_instance_js_1.default.info(`Deleting project: ${projectName}`); const requestBody = { project_name: projectName, judgment_api_key: this.judgmentApiKey, }; try { const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, { headers: this.getAuthHeaders() }); if (response.data && response.data.message === 'Project deleted successfully') { logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`); return true; } else { logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`); return false; } } catch (error) { logger_instance_js_1.default.error(`Error deleting project: ${error}`); this.handleApiError(error, 'deleteProject'); return false; } }); } /** * Validate that the user API key is valid */ validateApiKey() { return __awaiter(this, void 0, void 0, function* () { var _a, _b, _c, _d; logger_instance_js_1.default.debug('Validating API Key...'); try { // Instantiate EvalDatasetClient to perform the validation call const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId); // Use the dataset client to make the call yield datasetClient.pullProjectDatasetStats('__api_key_validation__'); logger_instance_js_1.default.debug('API Key appears valid.'); return [true, 'API Key is valid.']; } catch (error) { let message = 'API Key validation failed.'; if (axios_1.default.isAxiosError(error)) { if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) { message = 'API Key is invalid or expired.'; } else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) { // If validation endpoint returns 404, key might be valid but endpoint wrong/project not found // This depends on the specific validation endpoint behavior message = 'API Key might be valid, but validation endpoint returned 404.'; } else { message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`; } } else { message = `API Key validation failed: ${String(error)}`; } logger_instance_js_1.default.error(message); return [false, message]; } }); } /** * Assert a test by running the evaluation and checking the results for success */ assertTest(examples_1, scorers_1, model_1, aggregator_1, metadata_1) { return __awaiter(this, arguments, void 0, function* (examples, scorers, // Type matches Python's intent model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, rules) { const results = yield this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, true, // useJudgment = true (necessary if API scorers or rules are involved) false, // ignoreErrors = false for assert false, // asyncExecution = false rules); (0, run_evaluation_js_1.assertTest)(results); // Assumes assertTest handles ScoringResult[] }); } /** * Pull the results of an evaluation run. Matches `pullEval` logic but returns only the ScoringResult array. * @param projectName The name of the project * @param evalRunName The name of the evaluation run * @returns Array of ScoringResult objects */ pullEvalResults(projectName, evalRunName) { return __awaiter(this, void 0, void 0, function* () { var _a; // Get the raw API response const rawResults = yield this.pullEval(projectName, evalRunName); // Ensure proper handling of empty results if (!rawResults || !Array.isArray(rawResults) || rawResults.length === 0) { return []; } // Process the results to match Python SDK format const scoringResults = []; for (const item of rawResults) { if (item.result && item.result.scorers_data && Array.isArray(item.result.scorers_data)) { // Extract example data if available const exampleData = item.examples && item.examples.length > 0 ? item.examples[0] : null; // Create an Example object with the data from the API const example = new example_js_1.Example({ input: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.input) || '', actualOutput: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.actual_output) || '', expectedOutput: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.expected_output) || '', context: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.context) || null, retrievalContext: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.retrieval_context) || null, additionalMetadata: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.additional_metadata) || {}, toolsCalled: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.tools_called) || null, expectedTools: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.expected_tools) || null, exampleId: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.example_id) || null, name: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.name) || 'example', exampleIndex: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.example_index) || 0, timestamp: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.created_at) || new Date().toISOString(), traceId: ((_a = item.result) === null || _a === void 0 ? void 0 : _a.trace_id) || null }); // Create a ScoringResult using the builder pattern const scoringResult = result_js_1.ScoringResult.builder() .dataObject(example) .scorersData(item.result.scorers_data) .build(); scoringResults.push(scoringResult); } } return scoringResults; }); } /** * Check the status of an evaluation run using the fetch endpoint. * This is a heuristic approach as the endpoint might return full results or status info. * @param projectName The name of the project * @param evalRunName The name of the evaluation run * @returns An object representing the status { status: string, progress: number, message: string } */ checkEvalStatus(projectName, evalRunName) { return __awaiter(this, void 0, void 0, function* () { var _a; const requestBody = { project_name: projectName, eval_name: evalRunName, judgment_api_key: this.judgmentApiKey, }; try { const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, { headers: this.getAuthHeaders(), // Add a shorter timeout for status checks? // timeout: 5000 }); const data = response.data; // Check if the response looks like a status object if (data && typeof data.status === 'string') { return { status: data.status || 'unknown', progress: typeof data.progress === 'number' ? data.progress : 0, message: data.message || '', error: data.error }; } // Check if the response looks like completed results (array format from pullEval) else if (Array.isArray(data) && data.length > 0 && data[0].results) { return { status: 'completed', progress: 100, message: 'Evaluation completed.' }; } // Check if response looks like completed results (single object format) else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch return { status: 'completed', progress: 100, message: 'Evaluation completed.' }; } // Handle other potential responses or assume pending/unknown else { logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data); return { status: 'unknown', progress: 0, message: 'Could not determine status from API response.' }; } } catch (error) { // Don't throw here, return status indicating error let errorMessage = 'Failed to fetch evaluation status.'; let status = 'error'; if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) { status = 'not_found'; errorMessage = 'Evaluation run not found.'; logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`); } else { this.handleApiError(error, 'checkEvalStatus'); errorMessage = `Error fetching status: ${String(error)}`; } return { status: status, progress: 0, message: errorMessage, error: String(error) // Include error string }; } }); } /** * Wait for an async evaluation to complete and return the results * @param projectName The name of the project * @param evalRunName The name of the evaluation run * @param options Optional configuration for polling: intervalMs, maxAttempts, showProgress * @returns The evaluation results as ScoringResult[] or empty array on timeout/failure. */ waitForEvaluation(projectName_1, evalRunName_1) { return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) { const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins let progressBar; if (showProgress) { progressBar = new cli_progress_1.default.SingleBar({ format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`, barCompleteChar: '\u2588', barIncompleteChar: '\u2591', hideCursor: true, clearOnComplete: false, stopOnComplete: true, }, cli_progress_1.default.Presets.shades_classic); progressBar.start(100, 0, { status: 'Initiating...' }); } for (let attempt = 0; attempt < maxAttempts; attempt++) { try { const statusResult = yield this.checkEvalStatus(projectName, evalRunName); const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress const statusText = statusResult.message || statusResult.status; if (progressBar) { progressBar.update(progress, { status: statusText }); } if (statusResult.status === 'completed') { if (progressBar) { progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') }); } // Fetch final results using pullEval const finalResults = yield this.pullEvalResults(projectName, evalRunName); logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`); return finalResults; } else if (statusResult.status === 'error' || statusResult.status === 'failed') { // Concatenate error details into a single message string const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`; logger_instance_js_1.default.error(errorMsg); if (progressBar) progressBar.stop(); // Pass only the combined message to the constructor throw new run_evaluation_js_1.JudgmentAPIError(errorMsg); } else if (statusResult.status === 'not_found') { const errorMsg = `Evaluation run ${evalRunName} not found.`; logger_instance_js_1.default.error(errorMsg); if (progressBar) progressBar.stop(); // Pass only the message to the constructor throw new run_evaluation_js_1.JudgmentAPIError(errorMsg); } // Wait for the next interval yield new Promise(resolve => setTimeout(resolve, intervalMs)); } catch (error) { // Handle errors during the wait loop (e.g., network issues during checkEvalStatus) logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`); // Option: Rethrow immediately vs. retry vs. specific handling if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow if (progressBar) progressBar.stop(); throw error; } // For other errors, wait and retry (up to maxAttempts) if (attempt === maxAttempts) { if (progressBar) progressBar.stop(); throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`); } // Still retryable, wait for interval yield new Promise(resolve => setTimeout(resolve, intervalMs)); } } // If loop finishes without completion or error if (progressBar) progressBar.stop(); throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`); }); } /** * Create a simple ASCII progress bar * @param percent The percentage to display (0-100) * @returns A string representing the progress bar */ _createProgressBar(percent) { const width = 20; // Width of the progress bar const filled = Math.round(width * (percent / 100)); const empty = width - filled; return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`; } // Keep helper methods private getAuthHeaders() { return { 'Content-Type': 'application/json', 'Authorization': `Bearer ${this.judgmentApiKey}`, 'X-Organization-Id': this.organizationId, }; } // Ensure this handles errors from Eval/Project API calls correctly handleApiError(error, context) { logger_instance_js_1.default.error(`API Error during ${context}:`); if (axios_1.default.isAxiosError(error)) { const axiosError = error; const response = axiosError.response; if (response) { logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`); logger_instance_js_1.default.debug('Response Data:', response.data); if (response.status === 422) { logger_instance_js_1.default.error('Validation Error Detail:', response.data); } else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling logger_instance_js_1.default.error(`Evaluation run not found.`); } else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`); } } else if (axiosError.request) { logger_instance_js_1.default.error('No response received from server.'); } else { logger_instance_js_1.default.error(`Error setting up API request for ${context}`); } } else { logger_instance_js_1.default.error(`Unexpected error during ${context}`); } } } exports.JudgmentClient = JudgmentClient; //# sourceMappingURL=judgment-client.js.map