judgeval
Version:
Judgment SDK for TypeScript/JavaScript
798 lines • 43.4 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.JudgmentClient = void 0;
const dotenv = __importStar(require("dotenv"));
const axios_1 = __importDefault(require("axios"));
const example_js_1 = require("./data/example.js");
const result_js_1 = require("./data/result.js");
const base_scorer_js_1 = require("./scorers/base-scorer.js");
const evaluation_run_js_1 = require("./evaluation-run.js");
const rules_js_1 = require("./rules.js");
const run_evaluation_js_1 = require("./run-evaluation.js");
const constants_js_1 = require("./constants.js");
const logger_instance_js_1 = __importDefault(require("./common/logger-instance.js"));
// Keep progress bar imports if used elsewhere (e.g., waitForEvaluation)
const cli_progress_1 = __importDefault(require("cli-progress"));
const ansi_colors_1 = __importDefault(require("ansi-colors"));
const eval_dataset_client_js_1 = require("./data/datasets/eval-dataset-client.js");
// Load environment variables
dotenv.config();
/**
* Singleton implementation for JudgmentClient
*/
class JudgmentClient {
/**
* Get the singleton instance of JudgmentClient
*/
static getInstance(judgmentApiKey, organizationId) {
if (!JudgmentClient.instance) {
JudgmentClient.instance = new JudgmentClient(judgmentApiKey, organizationId);
}
return JudgmentClient.instance;
}
/**
* Constructor for JudgmentClient
* @param judgmentApiKey The Judgment API key
* @param organizationId The organization ID
*/
constructor(judgmentApiKey, organizationId) {
this.judgmentApiKey = judgmentApiKey || process.env.JUDGMENT_API_KEY || '';
this.organizationId = organizationId || process.env.JUDGMENT_ORG_ID || '';
// Keep this as direct output
console.log('Successfully initialized JudgmentClient!');
if (!this.judgmentApiKey) {
// Use logger for internal error, but throw for user
logger_instance_js_1.default.error('JUDGMENT_API_KEY is not set.');
throw new Error('Judgment API key is required. Set it in the constructor or as an environment variable JUDGMENT_API_KEY.');
}
if (!this.organizationId) {
throw new Error('Organization ID is required. Set it in the constructor or as an environment variable JUDGMENT_ORG_ID.');
}
}
/**
* Run an evaluation asynchronously
*/
aRunEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, rules) {
// Simply call runEvaluation with asyncExecution=true
return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, useJudgment, ignoreErrors, true, // Set asyncExecution to true
rules);
});
}
/**
* Run an evaluation
*/
runEvaluation(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
return __awaiter(this, arguments, void 0, function* (examples, scorers, model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules) {
try {
// Load appropriate implementations for all scorers
const loadedScorers = [];
for (const scorer of scorers) {
try {
if (scorer instanceof base_scorer_js_1.ScorerWrapper) {
loadedScorers.push(scorer.loadImplementation(useJudgment));
}
else {
// Assume scorers passed are already JudgevalScorer or APIJudgmentScorer
loadedScorers.push(scorer);
}
}
catch (error) {
throw new Error(`Failed to load implementation for scorer ${scorer.constructor.name}: ${error instanceof Error ? error.message : String(error)}`);
}
}
// Prevent using JudgevalScorer with rules - only APIJudgmentScorer allowed with rules
if (rules && loadedScorers.some(scorer => scorer instanceof base_scorer_js_1.JudgevalScorer)) {
throw new Error('Cannot use Judgeval scorers (only API scorers) when using rules. Please either remove rules or use only APIJudgmentScorer types.');
}
// Convert ScorerWrapper in rules to their implementations
let loadedRules;
if (rules) {
loadedRules = [];
for (const rule of rules) {
try {
const processedConditions = [];
for (const condition of rule.conditions) {
// Convert metric if it's a ScorerWrapper
if (condition.metric instanceof base_scorer_js_1.ScorerWrapper) {
try {
// Create a new Condition object with the loaded implementation
const loadedMetric = condition.metric.loadImplementation(useJudgment);
const newCondition = new rules_js_1.Condition(loadedMetric);
// Copy other properties from the original condition if necessary
// Example: newCondition.threshold = condition.threshold;
Object.assign(newCondition, Object.assign(Object.assign({}, condition), { metric: loadedMetric })); // Copy all properties, overriding metric
processedConditions.push(newCondition);
}
catch (error) {
throw new Error(`Failed to convert ScorerWrapper to implementation in rule '${rule.name}', condition metric '${condition.metric.constructor.name}': ${error instanceof Error ? error.message : String(error)}`);
}
}
else {
processedConditions.push(condition);
}
}
// Create new rule with processed conditions
const newRule = new rules_js_1.Rule(rule.name, processedConditions, rule.combine_type, rule.description, rule.notification, rule.ruleId);
loadedRules.push(newRule);
}
catch (error) {
throw new Error(`Failed to process rule '${rule.name}': ${error instanceof Error ? error.message : String(error)}`);
}
}
}
const evaluationRun = new evaluation_run_js_1.EvaluationRun({
logResults,
projectName,
evalName: evalRunName,
examples,
scorers: loadedScorers,
model,
aggregator,
metadata,
judgmentApiKey: this.judgmentApiKey,
rules: loadedRules,
organizationId: this.organizationId
});
return (0, run_evaluation_js_1.runEval)(evaluationRun, override, ignoreErrors, asyncExecution);
}
catch (error) {
if (error instanceof Error) {
if (error.message.includes('one or more fields are invalid')) {
throw new Error(`Please check your EvaluationRun object, one or more fields are invalid: \n${error.message}`);
}
else {
throw new Error(`An unexpected error occurred during evaluation: ${error.message}`);
}
}
else {
throw new Error(`An unexpected error occurred during evaluation: ${String(error)}`);
}
}
});
}
/**
* Run an evaluation with a simplified interface (recommended)
* @param config Configuration object for the evaluation
* @returns Promise<ScoringResult[]> The evaluation results
*/
evaluate(config) {
return __awaiter(this, void 0, void 0, function* () {
// Set default values
const { examples, scorers, model = 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo', aggregator = undefined, metadata = {}, projectName = 'default_project', evalName = `eval-run-${Date.now()}`, logResults = true, useJudgment = true, ignoreErrors = true, asyncExecution = false, rules = undefined, override = false } = config;
// Call the original runEvaluation method with the extracted parameters
return this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalName, override, useJudgment, ignoreErrors, asyncExecution, rules);
});
}
/**
* Evaluate a dataset
*/
evaluateDataset(dataset_1, scorers_1, model_1, aggregator_1, metadata_1) {
return __awaiter(this, arguments, void 0, function* (dataset, // Keep type loose for stub
scorers, model, aggregator, metadata, projectName = 'default_project', evalRunName = 'default_eval_run', logResults = true, useJudgment = true, rules) {
// Keep type loose for stub
throw new Error('Not implemented in JudgmentClient. Use EvalDatasetClient.');
});
}
/**
* Pull evaluation results from the server
*
* @param projectName Name of the project
* @param evalRunName Name of the evaluation run
* @returns Array of evaluation result objects with the same format as the Python SDK
*/
pullEval(projectName, evalRunName) {
return __awaiter(this, void 0, void 0, function* () {
const evalRunRequestBody = {
project_name: projectName,
eval_name: evalRunName,
judgment_api_key: this.judgmentApiKey
};
try {
logger_instance_js_1.default.info(`Pulling evaluation results for project '${projectName}', run '${evalRunName}'`);
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, evalRunRequestBody, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${this.judgmentApiKey}`,
"X-Organization-Id": this.organizationId
}
});
// Ensure we return the data in the exact same format as the Python SDK
return response.data;
}
catch (error) {
if (axios_1.default.isAxiosError(error) && error.response) {
const errorMessage = `Error fetching eval results: ${JSON.stringify(error.response.data)}`;
logger_instance_js_1.default.error(errorMessage);
throw new Error(errorMessage);
}
else {
const errorMessage = `Unknown error during pullEval: ${error}`;
logger_instance_js_1.default.error(errorMessage);
throw error;
}
}
});
}
/**
* Retrieves evaluation results with retry mechanism
* @param projectName Name of the project
* @param evalRunName Name of the evaluation run
* @param options Configuration options for retries
* @returns The evaluation results or null if not available after all retries
*/
pullEvalWithRetry(projectName_1, evalRunName_1) {
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
// Default options
const maxRetries = options.maxRetries || 3;
const initialDelayMs = options.initialDelayMs || 2000;
const maxDelayMs = options.maxDelayMs || 30000;
const backoffFactor = options.backoffFactor || 2;
let lastError = null;
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
// Calculate delay with exponential backoff, capped at maxDelayMs
const delayMs = Math.min(initialDelayMs * Math.pow(backoffFactor, attempt), maxDelayMs);
if (attempt > 0) {
logger_instance_js_1.default.info(`Retry attempt ${attempt + 1}/${maxRetries} for pullEval after ${delayMs}ms delay`);
yield new Promise(resolve => setTimeout(resolve, delayMs));
}
const results = yield this.pullEval(projectName, evalRunName);
return results;
}
catch (error) {
lastError = error;
// Check if we should retry based on error type
if (axios_1.default.isAxiosError(error) && error.response) {
const status = error.response.status;
// Don't retry for client errors (except 429 Too Many Requests)
if (status >= 400 && status < 500 && status !== 429) {
logger_instance_js_1.default.error(`Not retrying due to client error: ${status}`);
throw error;
}
}
logger_instance_js_1.default.warn(`Attempt ${attempt + 1} failed, ${attempt < maxRetries - 1 ? 'will retry' : 'giving up'}`);
}
}
// If we get here, all retries failed
logger_instance_js_1.default.error(`All ${maxRetries} retry attempts failed for pullEval`);
throw lastError || new Error('Failed to retrieve evaluation results after all retry attempts');
});
}
/**
* Export evaluation results to a file format
* @param projectName Name of the project
* @param evalRunName Name of the evaluation run
* @param format Export format ('json' or 'csv')
* @returns The exported data as a string
*/
exportEvalResults(projectName_1, evalRunName_1) {
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, format = 'json') {
logger_instance_js_1.default.info(`Exporting eval results for project '${projectName}', run '${evalRunName}' as ${format}`);
try {
const resultsData = yield this.pullEval(projectName, evalRunName);
if (!resultsData || resultsData.length === 0 || !resultsData[0].results) {
logger_instance_js_1.default.warn('No results found to export.');
return '';
}
const results = resultsData[0].results;
if (format === 'json') {
// Pretty print JSON
return JSON.stringify(results.map(r => r.toJSON()), null, 2);
}
else if (format === 'csv') {
if (results.length === 0)
return ''; // No data to export
// Dynamically determine headers from the first result object
// Flatten the structure for CSV
const flatResults = results.map(result => {
var _a, _b, _c;
const flat = {};
const exampleData = (_b = (_a = result.dataObject) === null || _a === void 0 ? void 0 : _a.toJSON()) !== null && _b !== void 0 ? _b : {}; // Use toJSON which gives snake_case
const scorersData = (_c = result.scorersData) !== null && _c !== void 0 ? _c : [];
// Add example data fields (snake_case)
for (const key in exampleData) {
// Prefix example fields to avoid collision, e.g., example_input
flat[`example_${key}`] = exampleData[key];
}
// Add scorers data
scorersData.forEach(scorer => {
flat[`scorer_${scorer.name}_score`] = scorer.score;
flat[`scorer_${scorer.name}_additional_metadata`] = JSON.stringify(scorer.additional_metadata);
flat[`scorer_${scorer.name}_error`] = scorer.error;
});
// Add top-level error if present
flat['top_level_error'] = result.error;
return flat;
});
// Get all unique keys from the flattened results for headers
const headers = Array.from(new Set(flatResults.flatMap(Object.keys)));
// Use papaparse for robust CSV generation
const Papa = require('papaparse'); // Use require here if not imported at top
const csv = Papa.unparse({
fields: headers,
data: flatResults
}, {
header: true,
quotes: true, // Ensure fields with commas/newlines are quoted
quoteChar: '"',
escapeChar: '"',
delimiter: ','
});
return csv;
}
else {
throw new Error(`Unsupported export format: ${format}`);
}
}
catch (error) {
logger_instance_js_1.default.error(`Error exporting eval results: ${error}`);
this.handleApiError(error, 'exportEvalResults');
throw error;
}
});
}
/**
* Delete an evaluation from the server
*/
deleteEval(projectName, evalRunNames) {
return __awaiter(this, void 0, void 0, function* () {
logger_instance_js_1.default.info(`Deleting eval runs: ${evalRunNames.join(', ')} from project: ${projectName}`);
const requestBody = {
project_name: projectName,
eval_names: evalRunNames,
judgment_api_key: this.judgmentApiKey,
};
try {
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_API_URL, requestBody, {
headers: this.getAuthHeaders()
});
logger_instance_js_1.default.info('Successfully deleted eval runs.');
return true;
}
catch (error) {
logger_instance_js_1.default.error(`Error deleting eval runs: ${error}`);
this.handleApiError(error, 'deleteEval');
return false;
}
});
}
/**
* Delete all evaluations from the server for a given project
*/
deleteProjectEvals(projectName) {
return __awaiter(this, void 0, void 0, function* () {
logger_instance_js_1.default.info(`Deleting ALL eval runs for project: ${projectName}`);
const requestBody = {
project_name: projectName,
judgment_api_key: this.judgmentApiKey,
};
try {
yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_DELETE_PROJECT_API_URL, requestBody, {
headers: this.getAuthHeaders()
});
logger_instance_js_1.default.info(`Successfully deleted all eval runs for project ${projectName}.`);
return true;
}
catch (error) {
logger_instance_js_1.default.error(`Error deleting project evals: ${error}`);
this.handleApiError(error, 'deleteProjectEvals');
return false;
}
});
}
/**
* Create a project on the server
*/
createProject(projectName) {
return __awaiter(this, void 0, void 0, function* () {
logger_instance_js_1.default.info(`Creating project: ${projectName}`);
const requestBody = {
project_name: projectName,
judgment_api_key: this.judgmentApiKey,
};
try {
logger_instance_js_1.default.info(`Creating project: ${projectName}`);
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_CREATE_API_URL, requestBody, {
headers: this.getAuthHeaders()
});
// Check for specific success message or status if API provides one
if (response.data && response.data.message === 'Project added successfully') {
logger_instance_js_1.default.info(`Successfully created project: ${projectName}`);
return true;
}
else if (response.data && response.data.message === 'Project already exists') {
logger_instance_js_1.default.warn(`Project '${projectName}' already exists.`);
return true; // Or false, depending on desired behavior for existing projects
}
else {
logger_instance_js_1.default.error(`Failed to create project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
return false;
}
}
catch (error) {
logger_instance_js_1.default.error(`Error creating project: ${error}`);
this.handleApiError(error, 'createProject');
return false;
}
});
}
/**
* Delete a project from the server
*/
deleteProject(projectName) {
return __awaiter(this, void 0, void 0, function* () {
logger_instance_js_1.default.info(`Deleting project: ${projectName}`);
const requestBody = {
project_name: projectName,
judgment_api_key: this.judgmentApiKey,
};
try {
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_PROJECT_DELETE_API_URL, requestBody, {
headers: this.getAuthHeaders()
});
if (response.data && response.data.message === 'Project deleted successfully') {
logger_instance_js_1.default.info(`Successfully deleted project: ${projectName}`);
return true;
}
else {
logger_instance_js_1.default.error(`Failed to delete project '${projectName}'. Response: ${JSON.stringify(response.data)}`);
return false;
}
}
catch (error) {
logger_instance_js_1.default.error(`Error deleting project: ${error}`);
this.handleApiError(error, 'deleteProject');
return false;
}
});
}
/**
* Validate that the user API key is valid
*/
validateApiKey() {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b, _c, _d;
logger_instance_js_1.default.debug('Validating API Key...');
try {
// Instantiate EvalDatasetClient to perform the validation call
const datasetClient = new eval_dataset_client_js_1.EvalDatasetClient(this.judgmentApiKey, this.organizationId);
// Use the dataset client to make the call
yield datasetClient.pullProjectDatasetStats('__api_key_validation__');
logger_instance_js_1.default.debug('API Key appears valid.');
return [true, 'API Key is valid.'];
}
catch (error) {
let message = 'API Key validation failed.';
if (axios_1.default.isAxiosError(error)) {
if (((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 401 || ((_b = error.response) === null || _b === void 0 ? void 0 : _b.status) === 403) {
message = 'API Key is invalid or expired.';
}
else if (((_c = error.response) === null || _c === void 0 ? void 0 : _c.status) === 404) {
// If validation endpoint returns 404, key might be valid but endpoint wrong/project not found
// This depends on the specific validation endpoint behavior
message = 'API Key might be valid, but validation endpoint returned 404.';
}
else {
message = `API Key validation failed with status ${(_d = error.response) === null || _d === void 0 ? void 0 : _d.status}: ${error.message}`;
}
}
else {
message = `API Key validation failed: ${String(error)}`;
}
logger_instance_js_1.default.error(message);
return [false, message];
}
});
}
/**
* Assert a test by running the evaluation and checking the results for success
*/
assertTest(examples_1, scorers_1, model_1, aggregator_1, metadata_1) {
return __awaiter(this, arguments, void 0, function* (examples, scorers, // Type matches Python's intent
model, aggregator, metadata, logResults = true, projectName = 'default_project', evalRunName = 'default_eval_run', override = false, rules) {
const results = yield this.runEvaluation(examples, scorers, model, aggregator, metadata, logResults, projectName, evalRunName, override, true, // useJudgment = true (necessary if API scorers or rules are involved)
false, // ignoreErrors = false for assert
false, // asyncExecution = false
rules);
(0, run_evaluation_js_1.assertTest)(results); // Assumes assertTest handles ScoringResult[]
});
}
/**
* Pull the results of an evaluation run. Matches `pullEval` logic but returns only the ScoringResult array.
* @param projectName The name of the project
* @param evalRunName The name of the evaluation run
* @returns Array of ScoringResult objects
*/
pullEvalResults(projectName, evalRunName) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
// Get the raw API response
const rawResults = yield this.pullEval(projectName, evalRunName);
// Ensure proper handling of empty results
if (!rawResults || !Array.isArray(rawResults) || rawResults.length === 0) {
return [];
}
// Process the results to match Python SDK format
const scoringResults = [];
for (const item of rawResults) {
if (item.result && item.result.scorers_data && Array.isArray(item.result.scorers_data)) {
// Extract example data if available
const exampleData = item.examples && item.examples.length > 0 ? item.examples[0] : null;
// Create an Example object with the data from the API
const example = new example_js_1.Example({
input: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.input) || '',
actualOutput: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.actual_output) || '',
expectedOutput: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.expected_output) || '',
context: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.context) || null,
retrievalContext: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.retrieval_context) || null,
additionalMetadata: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.additional_metadata) || {},
toolsCalled: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.tools_called) || null,
expectedTools: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.expected_tools) || null,
exampleId: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.example_id) || null,
name: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.name) || 'example',
exampleIndex: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.example_index) || 0,
timestamp: (exampleData === null || exampleData === void 0 ? void 0 : exampleData.created_at) || new Date().toISOString(),
traceId: ((_a = item.result) === null || _a === void 0 ? void 0 : _a.trace_id) || null
});
// Create a ScoringResult using the builder pattern
const scoringResult = result_js_1.ScoringResult.builder()
.dataObject(example)
.scorersData(item.result.scorers_data)
.build();
scoringResults.push(scoringResult);
}
}
return scoringResults;
});
}
/**
* Check the status of an evaluation run using the fetch endpoint.
* This is a heuristic approach as the endpoint might return full results or status info.
* @param projectName The name of the project
* @param evalRunName The name of the evaluation run
* @returns An object representing the status { status: string, progress: number, message: string }
*/
checkEvalStatus(projectName, evalRunName) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
const requestBody = {
project_name: projectName,
eval_name: evalRunName,
judgment_api_key: this.judgmentApiKey,
};
try {
const response = yield axios_1.default.post(constants_js_1.JUDGMENT_EVAL_FETCH_API_URL, requestBody, {
headers: this.getAuthHeaders(),
// Add a shorter timeout for status checks?
// timeout: 5000
});
const data = response.data;
// Check if the response looks like a status object
if (data && typeof data.status === 'string') {
return {
status: data.status || 'unknown',
progress: typeof data.progress === 'number' ? data.progress : 0,
message: data.message || '',
error: data.error
};
}
// Check if the response looks like completed results (array format from pullEval)
else if (Array.isArray(data) && data.length > 0 && data[0].results) {
return {
status: 'completed',
progress: 100,
message: 'Evaluation completed.'
};
}
// Check if response looks like completed results (single object format)
else if (data && typeof data.id === 'string' && Array.isArray(data.results)) { // Adjust based on actual API response for single result fetch
return {
status: 'completed',
progress: 100,
message: 'Evaluation completed.'
};
}
// Handle other potential responses or assume pending/unknown
else {
logger_instance_js_1.default.warn(`Unexpected response format when checking status for ${evalRunName}:`, data);
return {
status: 'unknown',
progress: 0,
message: 'Could not determine status from API response.'
};
}
}
catch (error) {
// Don't throw here, return status indicating error
let errorMessage = 'Failed to fetch evaluation status.';
let status = 'error';
if (axios_1.default.isAxiosError(error) && ((_a = error.response) === null || _a === void 0 ? void 0 : _a.status) === 404) {
status = 'not_found';
errorMessage = 'Evaluation run not found.';
logger_instance_js_1.default.warn(`Evaluation run ${evalRunName} not found.`);
}
else {
this.handleApiError(error, 'checkEvalStatus');
errorMessage = `Error fetching status: ${String(error)}`;
}
return {
status: status,
progress: 0,
message: errorMessage,
error: String(error) // Include error string
};
}
});
}
/**
* Wait for an async evaluation to complete and return the results
* @param projectName The name of the project
* @param evalRunName The name of the evaluation run
* @param options Optional configuration for polling: intervalMs, maxAttempts, showProgress
* @returns The evaluation results as ScoringResult[] or empty array on timeout/failure.
*/
waitForEvaluation(projectName_1, evalRunName_1) {
return __awaiter(this, arguments, void 0, function* (projectName, evalRunName, options = {}) {
const { intervalMs = 5000, maxAttempts = 120, showProgress = true } = options; // Default: check every 5s for 10 mins
let progressBar;
if (showProgress) {
progressBar = new cli_progress_1.default.SingleBar({
format: `Waiting for ${ansi_colors_1.default.magenta(evalRunName)}... | ${ansi_colors_1.default.cyan('{bar}')} | {percentage}% || {status}`,
barCompleteChar: '\u2588',
barIncompleteChar: '\u2591',
hideCursor: true,
clearOnComplete: false,
stopOnComplete: true,
}, cli_progress_1.default.Presets.shades_classic);
progressBar.start(100, 0, { status: 'Initiating...' });
}
for (let attempt = 0; attempt < maxAttempts; attempt++) {
try {
const statusResult = yield this.checkEvalStatus(projectName, evalRunName);
const progress = Math.max(0, Math.min(100, statusResult.progress || 0)); // Clamp progress
const statusText = statusResult.message || statusResult.status;
if (progressBar) {
progressBar.update(progress, { status: statusText });
}
if (statusResult.status === 'completed') {
if (progressBar) {
progressBar.update(100, { status: ansi_colors_1.default.green('Completed! Fetching results...') });
}
// Fetch final results using pullEval
const finalResults = yield this.pullEvalResults(projectName, evalRunName);
logger_instance_js_1.default.info(`Evaluation run ${evalRunName} completed successfully.`);
return finalResults;
}
else if (statusResult.status === 'error' || statusResult.status === 'failed') {
// Concatenate error details into a single message string
const errorMsg = `Evaluation run ${evalRunName} failed or encountered an error: ${statusResult.error ? String(statusResult.error) : statusResult.message}`;
logger_instance_js_1.default.error(errorMsg);
if (progressBar)
progressBar.stop();
// Pass only the combined message to the constructor
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
}
else if (statusResult.status === 'not_found') {
const errorMsg = `Evaluation run ${evalRunName} not found.`;
logger_instance_js_1.default.error(errorMsg);
if (progressBar)
progressBar.stop();
// Pass only the message to the constructor
throw new run_evaluation_js_1.JudgmentAPIError(errorMsg);
}
// Wait for the next interval
yield new Promise(resolve => setTimeout(resolve, intervalMs));
}
catch (error) {
// Handle errors during the wait loop (e.g., network issues during checkEvalStatus)
logger_instance_js_1.default.error(`Error during waitForEvaluation loop (attempt ${attempt}): ${error}`);
// Option: Rethrow immediately vs. retry vs. specific handling
if (error instanceof run_evaluation_js_1.JudgmentAPIError) { // If it was already a processed API error, rethrow
if (progressBar)
progressBar.stop();
throw error;
}
// For other errors, wait and retry (up to maxAttempts)
if (attempt === maxAttempts) {
if (progressBar)
progressBar.stop();
throw new Error(`waitForEvaluation failed after ${maxAttempts} attempts: ${error}`);
}
// Still retryable, wait for interval
yield new Promise(resolve => setTimeout(resolve, intervalMs));
}
}
// If loop finishes without completion or error
if (progressBar)
progressBar.stop();
throw new Error(`Evaluation run ${evalRunName} did not complete after ${maxAttempts} attempts.`);
});
}
/**
* Create a simple ASCII progress bar
* @param percent The percentage to display (0-100)
* @returns A string representing the progress bar
*/
_createProgressBar(percent) {
const width = 20; // Width of the progress bar
const filled = Math.round(width * (percent / 100));
const empty = width - filled;
return `[${'#'.repeat(filled)}${'.'.repeat(empty)}] ${percent.toFixed(1)}%`;
}
// Keep helper methods private
getAuthHeaders() {
return {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.judgmentApiKey}`,
'X-Organization-Id': this.organizationId,
};
}
// Ensure this handles errors from Eval/Project API calls correctly
handleApiError(error, context) {
logger_instance_js_1.default.error(`API Error during ${context}:`);
if (axios_1.default.isAxiosError(error)) {
const axiosError = error;
const response = axiosError.response;
if (response) {
logger_instance_js_1.default.error(`Status: ${response.status} ${response.statusText}`);
logger_instance_js_1.default.debug('Response Data:', response.data);
if (response.status === 422) {
logger_instance_js_1.default.error('Validation Error Detail:', response.data);
}
else if (context === 'pullEval' && response.status === 404) { // Keep eval-specific handling
logger_instance_js_1.default.error(`Evaluation run not found.`);
}
else if (context.startsWith('delete') && response.status === 404) { // Keep generic delete handling
logger_instance_js_1.default.warn(`${context}: Resource not found, may have already been deleted.`);
}
}
else if (axiosError.request) {
logger_instance_js_1.default.error('No response received from server.');
}
else {
logger_instance_js_1.default.error(`Error setting up API request for ${context}`);
}
}
else {
logger_instance_js_1.default.error(`Unexpected error during ${context}`);
}
}
}
exports.JudgmentClient = JudgmentClient;
//# sourceMappingURL=judgment-client.js.map