judgeval
Version:
Judgment SDK for TypeScript/JavaScript
708 lines ⢠37.2 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import axios from 'axios';
import { Example } from './data/example.js';
import { ScoringResult } from './data/result.js';
import { APIJudgmentScorer } from './scorers/base-scorer.js';
import { ROOT_API, JUDGMENT_EVAL_API_URL, JUDGMENT_EVAL_LOG_API_URL, JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, JUDGMENT_EVAL_FETCH_API_URL } from './constants.js';
import { log as loggerLog, info as loggerInfo, warn as loggerWarn, error as loggerError } from './common/logger.js';
/**
* Custom error for Judgment API errors
*/
export class JudgmentAPIError extends Error {
constructor(message) {
super(message);
this.name = 'JudgmentAPIError';
}
}
/**
* Validates an API response to ensure it has the expected format
* Throws a JudgmentAPIError if the response is invalid
*/
export function validateApiResponse(response) {
if (!response || typeof response !== 'object') {
throw new JudgmentAPIError('Invalid API response format: response is not an object');
}
if (response.error) {
throw new JudgmentAPIError(`API error: ${response.error}`);
}
}
/**
* Sends an evaluation run to the RabbitMQ evaluation queue
*/
export function sendToRabbitMQ(evaluationRun) {
return __awaiter(this, void 0, void 0, function* () {
const payload = evaluationRun.toJSON();
try {
const response = yield axios.post(JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, payload, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${evaluationRun.judgmentApiKey}`,
'X-Organization-Id': evaluationRun.organizationId
}
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error) && error.response) {
throw new JudgmentAPIError(`Error sending to RabbitMQ: ${error.response.data.detail || error.message}`);
}
else {
throw new JudgmentAPIError(`Error sending to RabbitMQ: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
}
}
});
}
/**
* Checks the status of an async evaluation
* @param evaluationRun The evaluation run to check
* @returns The status of the evaluation
*/
export function checkEvaluationStatus(evaluationRun) {
return __awaiter(this, void 0, void 0, function* () {
try {
const response = yield axios.post(`${ROOT_API}/check-eval-status/`, {
eval_name: evaluationRun.evalName,
project_name: evaluationRun.projectName,
judgment_api_key: evaluationRun.judgmentApiKey,
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${evaluationRun.judgmentApiKey}`,
'X-Organization-Id': evaluationRun.organizationId
}
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error) && error.response) {
throw new JudgmentAPIError(`Error checking evaluation status: ${error.response.data.detail || error.message}`);
}
else {
throw new JudgmentAPIError(`Error checking evaluation status: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
}
}
});
}
/**
* Polls the status of an async evaluation until it's complete
* @param evaluationRun The evaluation run to poll
* @param intervalMs The interval between polls in milliseconds
* @param maxAttempts The maximum number of polling attempts
* @param onProgress Optional callback for progress updates
* @returns The evaluation results
*/
export function pollEvaluationStatus(evaluationRun_1) {
return __awaiter(this, arguments, void 0, function* (evaluationRun, intervalMs = 2000, maxAttempts = 300, onProgress) {
let attempts = 0;
while (attempts < maxAttempts) {
try {
const status = yield checkEvaluationStatus(evaluationRun);
// Call progress callback if provided
if (onProgress) {
onProgress(status);
}
// Check if evaluation is complete
if (status.status === 'complete') {
loggerLog('Async evaluation complete, fetching results');
// Fetch the results
const response = yield axios.post(JUDGMENT_EVAL_FETCH_API_URL, {
eval_name: evaluationRun.evalName,
project_name: evaluationRun.projectName,
judgment_api_key: evaluationRun.judgmentApiKey,
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${evaluationRun.judgmentApiKey}`,
'X-Organization-Id': evaluationRun.organizationId
}
});
console.log('Raw API evaluation results:', JSON.stringify(response.data, null, 2));
// Convert API results to ScoringResult objects
const results = response.data.map((result) => {
return new ScoringResult({
dataObject: result.data_object,
scorersData: result.scorers_data || [],
error: result.error
});
});
return results;
}
else if (status.status === 'failed') {
throw new JudgmentAPIError(`Async evaluation failed: ${status.error || 'Unknown error'}`);
}
// Log progress
loggerLog(`Evaluation status: ${status.status}, progress: ${status.progress || 'unknown'}`);
// Wait before next poll
yield new Promise(resolve => setTimeout(resolve, intervalMs));
attempts++;
}
catch (error) {
loggerError(`Error polling evaluation status: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
throw new JudgmentAPIError(`Error polling evaluation status: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
}
}
throw new JudgmentAPIError(`Evaluation polling timed out after ${maxAttempts} attempts`);
});
}
/**
* Executes an evaluation of a list of Examples using one or more JudgmentScorers via the Judgment API
* @param evaluationRun The evaluation run object containing the examples, scorers, and metadata
* @returns The results of the evaluation
*/
export function executeApiEval(evaluationRun) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
try {
// Submit API request to execute evals
const payload = evaluationRun.toJSON();
// Ensure all examples have valid UUIDs and required fields for each scorer type
if (payload.examples && Array.isArray(payload.examples)) {
payload.examples.forEach(example => {
// Ensure example_id is a valid UUID (matching Python's uuid4 format)
if (!example.example_id || !example.example_id.match(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i)) {
// Generate a UUID v4 format ID
example.example_id = 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) {
const r = Math.random() * 16 | 0;
const v = c === 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
// Ensure required fields for all scorers (matching Python SDK)
example.name = example.name || "example";
example.example_index = example.example_index || 0;
example.timestamp = example.timestamp || new Date().toISOString();
example.trace_id = example.trace_id || `trace-${Date.now()}-${Math.floor(Math.random() * 1000)}`;
// Check for required fields for each scorer type
if (payload.scorers && Array.isArray(payload.scorers)) {
payload.scorers.forEach((scorer) => {
// Find the corresponding scorer in the evaluationRun
const scorerObj = evaluationRun.scorers.find(s => s.type === scorer.score_type);
if (scorerObj && scorerObj.requiredFields) {
// Check if the example has all the required fields for this scorer
scorerObj.requiredFields.forEach((field) => {
const snakeCaseField = field.replace(/([A-Z])/g, '_$1').toLowerCase(); // Convert camelCase to snake_case
// If the field is missing, add it with an appropriate default value
if (example[snakeCaseField] === undefined) {
if (snakeCaseField === 'context' || snakeCaseField === 'retrieval_context') {
example[snakeCaseField] = [];
}
else if (snakeCaseField === 'input' || snakeCaseField === 'actual_output' || snakeCaseField === 'expected_output') {
example[snakeCaseField] = example[snakeCaseField] || '';
}
}
else if ((snakeCaseField === 'context' || snakeCaseField === 'retrieval_context') && !Array.isArray(example[snakeCaseField])) {
// Ensure context and retrieval_context are arrays
example[snakeCaseField] = [example[snakeCaseField]];
}
});
}
// Special handling for contextual scorers
if (['contextual_relevancy', 'contextual_precision', 'contextual_recall', 'faithfulness', 'hallucination'].includes(scorer.score_type)) {
// For contextual scorers, ensure context is always an array
if (!example.context) {
example.context = [];
}
else if (!Array.isArray(example.context)) {
// If context is provided but not as an array, convert it to an array
example.context = [example.context];
}
// For contextual relevancy, also ensure retrieval_context is an array
if (scorer.score_type === 'contextual_relevancy') {
if (!example.retrieval_context) {
example.retrieval_context = example.context || [];
}
else if (!Array.isArray(example.retrieval_context)) {
example.retrieval_context = [example.retrieval_context];
}
}
}
});
}
});
}
const response = yield axios.post(JUDGMENT_EVAL_API_URL, payload, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${evaluationRun.judgmentApiKey}`,
'X-Organization-Id': evaluationRun.organizationId
}
});
return response.data;
}
catch (error) {
if (axios.isAxiosError(error) && error.response) {
const errorMessage = ((_a = error.response.data) === null || _a === void 0 ? void 0 : _a.detail) || JSON.stringify(error.response.data) || 'An unknown error occurred.';
loggerError(`Error: ${errorMessage}`);
throw new JudgmentAPIError(errorMessage);
}
else {
loggerError(`Error: ${error}`);
throw new JudgmentAPIError(`An error occurred while executing the Judgment API request: ${error}`);
}
}
});
}
/**
* Checks if an evaluation run name already exists for a given project
*/
export function checkEvalRunNameExists(evalName, projectName, judgmentApiKey, organizationId) {
return __awaiter(this, void 0, void 0, function* () {
try {
const response = yield axios.post(`${ROOT_API}/eval-run-name-exists/`, {
eval_name: evalName,
project_name: projectName,
judgment_api_key: judgmentApiKey,
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${judgmentApiKey}`,
'X-Organization-Id': organizationId
}
});
// Check if the evaluation run name already exists
if (response.status === 409) {
throw new JudgmentAPIError(`Evaluation run name '${evalName}' already exists for project '${projectName}'.`);
}
// Check if the response status code is not 2XX
if (!response.status.toString().startsWith('2')) {
const responseData = response.data;
const errorMessage = responseData.detail || 'An unknown error occurred.';
loggerError(`Error checking eval run name: ${errorMessage}`);
throw new JudgmentAPIError(errorMessage);
}
}
catch (error) {
if (axios.isAxiosError(error) && error.response) {
if (error.response.status === 409) {
throw new JudgmentAPIError(`Evaluation run name '${evalName}' already exists for project '${projectName}'.`);
}
}
// For connection errors or other issues, log but continue
loggerError(`Failed to check if eval run name exists: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
// Don't throw an error here, just log it and continue
}
});
}
// Track whether a URL has been printed
export let hasLoggedUrl = false;
/**
* Logs evaluation results to the Judgment API database.
* @param results The results to log
* @param projectName The project name
* @param evalName The evaluation run name
* @param apiKey The API key for the Judgment API
* @param organizationId The organization ID
* @returns A URL to view the results in the Judgment UI
*/
export function logEvaluationResults(results_1, projectName_1, evalName_1) {
return __awaiter(this, arguments, void 0, function* (results, projectName, evalName, apiKey = '', organizationId) {
var _a;
try {
const response = yield axios.post(JUDGMENT_EVAL_LOG_API_URL, {
results: results.map(result => result.toJSON()),
project_name: projectName,
eval_name: evalName,
run: {
project_name: projectName,
eval_name: evalName,
examples: results.map(result => result.dataObject),
scorers: results.flatMap(result => result.scorersData ? result.scorersData.map(scorer => ({
name: scorer.name,
threshold: scorer.threshold,
score_type: scorer.name.toLowerCase().replace(' ', '_')
})) : []),
model: "gpt-3.5-turbo", // Default model
log_results: true,
judgment_api_key: apiKey,
organization_id: organizationId,
append: false
}
}, {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`,
'X-Organization-Id': organizationId
}
});
if (response.status < 200 || response.status >= 300) {
const responseData = response.data;
const errorMessage = (responseData === null || responseData === void 0 ? void 0 : responseData.detail) || 'An unknown error occurred.';
loggerError(`Error ${response.status}: ${errorMessage}`);
throw new JudgmentAPIError(errorMessage);
}
if (response.data && response.data.ui_results_url) {
const url = response.data.ui_results_url;
const prettyStr = `\n \nš You can view your evaluation results here: ${url}\n`;
console.log(prettyStr);
hasLoggedUrl = true;
return url;
}
return '';
}
catch (error) {
if (axios.isAxiosError(error) && error.response) {
const errorMessage = ((_a = error.response.data) === null || _a === void 0 ? void 0 : _a.detail) || JSON.stringify(error.response.data) || 'An unknown error occurred.';
loggerError(`Error: ${errorMessage}`);
throw new JudgmentAPIError(errorMessage);
}
else {
loggerError(`Error: ${error}`);
throw new JudgmentAPIError(`An error occurred while logging evaluation results: ${error}`);
}
}
});
}
/**
* When executing scorers that come from both the Judgment API and local scorers, we're left with
* results for each type of scorer. This function merges the results from the API and local evaluations,
* grouped by example.
*/
export function mergeResults(apiResults, localResults) {
// No merge required
if (!localResults.length && apiResults.length) {
return apiResults;
}
if (!apiResults.length && localResults.length) {
return localResults;
}
if (apiResults.length !== localResults.length) {
// Results should be of same length because each ScoringResult is a 1-1 mapping to an Example
throw new Error(`The number of API and local results do not match: ${apiResults.length} vs ${localResults.length}`);
}
// Each ScoringResult in api and local have all the same fields besides `scorersData`
for (let i = 0; i < apiResults.length; i++) {
const apiResult = apiResults[i];
const localResult = localResults[i];
if (!apiResult.dataObject || !localResult.dataObject) {
throw new Error('Data object is null in one of the results.');
}
// Verify the results are aligned
if (apiResult.dataObject.input !== localResult.dataObject.input ||
apiResult.dataObject.actualOutput !== localResult.dataObject.actualOutput ||
apiResult.dataObject.expectedOutput !== localResult.dataObject.expectedOutput) {
throw new Error('The API and local results are not aligned.');
}
// Merge ScorerData from the API and local scorers together
const apiScorerData = apiResult.scorersData;
const localScorerData = localResult.scorersData;
if (!apiScorerData && localScorerData) {
apiResult.scorersData = localScorerData;
}
else if (apiScorerData && localScorerData) {
apiResult.scorersData = [...apiScorerData, ...localScorerData];
}
}
return apiResults;
}
/**
* Checks if any ScoringResult objects are missing scorersData
*/
export function checkMissingScorerData(results) {
var _a;
for (let i = 0; i < results.length; i++) {
if (!results[i].scorersData || ((_a = results[i].scorersData) === null || _a === void 0 ? void 0 : _a.length) === 0) {
loggerError(`Scorer data is missing for example ${i}. ` +
'This is usually caused when the example does not contain ' +
'the fields required by the scorer. ' +
'Check that your example contains the fields required by the scorers.');
}
}
return results;
}
/**
* Checks if the example contains the necessary parameters for the scorer
*/
export function checkExamples(examples, scorers) {
for (const scorer of scorers) {
for (const example of examples) {
// Check for required fields based on scorer type
switch (scorer.scoreType) {
case 'answer_correctness':
case 'answer_relevancy':
if (!example.expectedOutput) {
loggerWarn(`Scorer ${scorer.scoreType} requires expectedOutput field`);
}
break;
case 'contextual_precision':
case 'contextual_recall':
case 'contextual_relevancy':
if (!example.context || example.context.length === 0) {
loggerWarn(`Scorer ${scorer.scoreType} requires context field`);
}
break;
case 'execution_order':
if (!example.expectedTools || example.expectedTools.length === 0) {
loggerWarn(`Scorer ${scorer.scoreType} requires expectedTools field`);
}
break;
// Add more checks for other scorer types as needed
}
}
}
}
/**
* Executes an evaluation of Examples using one or more Scorers
*/
export function runEval(evaluationRun_1) {
return __awaiter(this, arguments, void 0, function* (evaluationRun, override = false, ignoreErrors = true, asyncExecution = false) {
// Check if the evaluation run name already exists
// This prevents accidentally overwriting existing evaluation results
if (!override && evaluationRun.logResults) {
yield checkEvalRunNameExists(evaluationRun.evalName || '', evaluationRun.projectName || '', evaluationRun.judgmentApiKey || '', evaluationRun.organizationId || '');
}
// --- Set example IDs and timestamps if not already set ---
// This is important for tracking and debugging purposes
loggerLog("Initializing examples with IDs and timestamps");
evaluationRun.examples.forEach((example, idx) => {
example.exampleIndex = idx; // Set numeric index
example.timestamp = new Date().toISOString().replace(/[-:]/g, '').split('.')[0];
loggerLog(`Initialized example ${example.exampleId} (index: ${example.exampleIndex})`);
loggerLog(`Input: ${example.input}`);
loggerLog(`Actual output: ${example.actualOutput || ''}`);
if (example.expectedOutput) {
loggerLog(`Expected output: ${example.expectedOutput}`);
}
if (example.context) {
loggerLog(`Context: ${example.context}`);
}
});
loggerLog(`Starting evaluation run with ${evaluationRun.examples.length} examples`);
// --- Split scorers into API and local ---
// API scorers run on the Judgment API server
// Local scorers run in this process
loggerLog("Grouping scorers by type");
const apiScorers = [];
const localScorers = [];
evaluationRun.scorers.forEach(scorer => {
if (scorer instanceof APIJudgmentScorer) {
apiScorers.push(scorer);
loggerLog(`Added judgment scorer: ${scorer.constructor.name}`);
}
else {
localScorers.push(scorer);
loggerLog(`Added local scorer: ${scorer.constructor.name}`);
}
});
loggerLog(`Found ${apiScorers.length} judgment scorers and ${localScorers.length} local scorers`);
let apiResults = [];
let localResults = [];
// --- Handle async execution ---
// This allows evaluations to run in the background without blocking
// Useful for large-scale evaluations that might take a long time
if (asyncExecution) {
checkExamples(evaluationRun.examples, evaluationRun.scorers);
loggerLog("Starting async evaluation");
// Add the evaluation to the RabbitMQ queue for async processing
// The server will pick it up and process it in the background
try {
yield axios.post(JUDGMENT_ADD_TO_RUN_EVAL_QUEUE_API_URL, evaluationRun.toJSON(), {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${evaluationRun.judgmentApiKey}`,
'X-Organization-Id': evaluationRun.organizationId
}
});
loggerInfo("Successfully added evaluation to queue");
}
catch (error) {
// Log the error but don't throw it - this matches Python SDK behavior
// Combine error message into the first argument
loggerError(`Error adding evaluation to queue: ${error instanceof Error ? error.message : String(error)}`);
// Always print success message to match Python SDK behavior
// This is important because the Python SDK always prints this message
// even when there's an error connecting to RabbitMQ
loggerInfo("Successfully added evaluation to queue (Note: Previous error occurred)");
}
// Return empty results for async execution
// The results will be available later via the UI or API
return [];
}
else {
// --- Execute API scorers ---
// These run on the Judgment API server
if (apiScorers.length > 0) {
try {
loggerLog('Executing API evaluation...');
const apiResponseData = yield executeApiEval(evaluationRun);
// Create ScoringResult objects from API response
// Check if the response has a results field (matching Python SDK format)
const resultsData = apiResponseData && typeof apiResponseData === 'object' && 'results' in apiResponseData
? apiResponseData.results
: apiResponseData;
apiResults = resultsData.map((result) => {
// Defensive: Ensure data_object is present and well-formed
let dataObject = result.data_object;
let dataObjectError = undefined;
if (!dataObject || typeof dataObject !== 'object') {
dataObjectError = 'Missing or malformed data_object in API result.';
// Try to reconstruct minimal Example if possible
dataObject = {
input: result.input || '',
actualOutput: result.actual_output || '',
expectedOutput: result.expected_output || '',
name: result.name || 'example',
exampleId: result.example_id || '',
exampleIndex: result.example_index || 0,
timestamp: result.timestamp || new Date().toISOString(),
traceId: result.trace_id || null,
};
}
// Defensive: If still missing required fields, fill with defaults
dataObject.input = dataObject.input || '';
dataObject.name = dataObject.name || 'example';
dataObject.exampleId = dataObject.exampleId || '';
dataObject.exampleIndex = dataObject.exampleIndex || 0;
dataObject.timestamp = dataObject.timestamp || new Date().toISOString();
dataObject.traceId = dataObject.traceId || null;
// Create Example instance for ScoringResult
const exampleInstance = new Example(dataObject);
// Compose error message if needed
const errorMsg = result.error && dataObjectError
? `${result.error} | ${dataObjectError}`
: result.error || dataObjectError;
return new ScoringResult({
dataObject: exampleInstance,
scorersData: result.scorers_data,
error: errorMsg
});
});
loggerLog(`API evaluation complete with ${apiResults.length} results`);
}
catch (error) {
loggerError(`Error executing API evaluation: ${error}`);
if (!ignoreErrors) {
throw error;
}
}
}
// --- Execute local scorers ---
// These run in this process
if (localScorers.length > 0) {
loggerLog('Starting local evaluation');
try {
// Process each example with each local scorer
localResults = yield Promise.all(evaluationRun.examples.map((example) => __awaiter(this, void 0, void 0, function* () {
const scorersData = [];
// Run each local scorer on the example
for (const scorer of localScorers) {
try {
loggerLog(`Running local scorer ${scorer.type} on example ${example.exampleId}`);
const scorerData = yield scorer.scoreExample(example);
scorersData.push(scorerData);
loggerLog(`Scorer ${scorer.type} result: score=${scorerData.score}, success=${scorerData.success}`);
}
catch (scorerError) {
loggerError(`Error running scorer ${scorer.type} on example ${example.exampleId}: ${(scorerError === null || scorerError === void 0 ? void 0 : scorerError.message) || String(scorerError)}`);
// Add failed scorer data
scorersData.push({
name: scorer.type,
threshold: scorer.threshold,
success: false,
score: 0,
reason: null,
strict_mode: null,
evaluation_model: null,
error: (scorerError === null || scorerError === void 0 ? void 0 : scorerError.message) || String(scorerError),
evaluation_cost: null,
verbose_logs: null,
additional_metadata: scorer.additional_metadata || {}
});
}
}
// Create a ScoringResult with all scorer data for this example
return new ScoringResult({
dataObject: example,
scorersData: scorersData,
error: undefined
});
})));
loggerLog(`Local evaluation complete with ${localResults.length} results`);
}
catch (error) {
loggerError(`Error executing local evaluation: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
if (!ignoreErrors) {
throw error;
}
else {
// Create empty results with errors if ignoring errors
localResults = evaluationRun.examples.map(example => {
return new ScoringResult({
dataObject: example,
scorersData: [],
error: error === null || error === void 0 ? void 0 : error.message
});
});
}
}
}
// --- Merge results from API and local scorers ---
// This combines the results from both types of scorers
// Align with Python - only record content and usage base
loggerLog('Merging API and local results');
const mergedResults = mergeResults(apiResults, localResults);
// Check for missing scorer data
// This helps identify examples that couldn't be evaluated
const checkedResults = checkMissingScorerData(mergedResults);
loggerLog(`Successfully merged ${checkedResults.length} results`);
// --- Log results to Judgment API if requested ---
// This saves the results to the database for later viewing
if (evaluationRun.logResults) {
try {
const url = yield logEvaluationResults(checkedResults, evaluationRun.projectName || '', evaluationRun.evalName || '', evaluationRun.judgmentApiKey || '', evaluationRun.organizationId || '');
loggerLog(`Results logged to Judgment API: ${url}`);
}
catch (error) {
loggerError(`Error logging evaluation results: ${(error === null || error === void 0 ? void 0 : error.message) || String(error)}`);
if (!ignoreErrors) {
throw error;
}
}
}
// --- Check for examples with no scorer data ---
// This helps identify examples that couldn't be evaluated
for (let i = 0; i < checkedResults.length; i++) {
const result = checkedResults[i];
if (!result.scorersData || result.scorersData.length === 0) {
loggerLog(`None of the scorers could be executed on example ${i}. This is usually because the Example is missing the fields needed by the scorers. Try checking that the Example has the necessary fields for your scorers.`);
}
}
return checkedResults;
}
});
}
/**
* Collects all failed scorers from the scoring results
* Raises exceptions for any failed test cases
*/
export function assertTest(scoringResults) {
var _a;
const failedTests = [];
for (const result of scoringResults) {
if (result.error) {
failedTests.push(`Error in result: ${result.error}`);
continue;
}
if (!result.scorersData || ((_a = result.scorersData) === null || _a === void 0 ? void 0 : _a.length) === 0) {
failedTests.push('No scorer data found in result');
continue;
}
for (const scorerData of result.scorersData) {
if (!scorerData.success) {
failedTests.push(`Test failed: ${scorerData.name} with score ${scorerData.score} ` +
`(threshold: ${scorerData.threshold})`);
}
}
}
if (failedTests.length > 0) {
throw new Error(`Test assertion failed:\n${failedTests.join('\n')}`);
}
}
//# sourceMappingURL=run-evaluation.js.map