judgeval
Version:
Judgment SDK for TypeScript/JavaScript
79 lines (78 loc) • 3.68 kB
TypeScript
import { Example } from './data/example.js';
import { EvaluationRun } from './evaluation-run.js';
import { ScoringResult } from './data/result.js';
import { APIJudgmentScorer } from './scorers/base-scorer.js';
/**
* Custom error for Judgment API errors
*/
export declare class JudgmentAPIError extends Error {
constructor(message: string);
}
/**
* Validates an API response to ensure it has the expected format
* Throws a JudgmentAPIError if the response is invalid
*/
export declare function validateApiResponse(response: any): void;
/**
* Sends an evaluation run to the RabbitMQ evaluation queue
*/
export declare function sendToRabbitMQ(evaluationRun: EvaluationRun): Promise<any>;
/**
* Checks the status of an async evaluation
* @param evaluationRun The evaluation run to check
* @returns The status of the evaluation
*/
export declare function checkEvaluationStatus(evaluationRun: EvaluationRun): Promise<any>;
/**
* Polls the status of an async evaluation until it's complete
* @param evaluationRun The evaluation run to poll
* @param intervalMs The interval between polls in milliseconds
* @param maxAttempts The maximum number of polling attempts
* @param onProgress Optional callback for progress updates
* @returns The evaluation results
*/
export declare function pollEvaluationStatus(evaluationRun: EvaluationRun, intervalMs?: number, maxAttempts?: number, onProgress?: (status: any) => void): Promise<ScoringResult[]>;
/**
* Executes an evaluation of a list of Examples using one or more JudgmentScorers via the Judgment API
* @param evaluationRun The evaluation run object containing the examples, scorers, and metadata
* @returns The results of the evaluation
*/
export declare function executeApiEval(evaluationRun: EvaluationRun): Promise<any[]>;
/**
* Checks if an evaluation run name already exists for a given project
*/
export declare function checkEvalRunNameExists(evalName: string, projectName: string, judgmentApiKey: string, organizationId: string): Promise<void>;
export declare let hasLoggedUrl: boolean;
/**
* Logs evaluation results to the Judgment API database.
* @param results The results to log
* @param projectName The project name
* @param evalName The evaluation run name
* @param apiKey The API key for the Judgment API
* @param organizationId The organization ID
* @returns A URL to view the results in the Judgment UI
*/
export declare function logEvaluationResults(results: ScoringResult[], projectName: string, evalName: string, apiKey: string | undefined, organizationId: string): Promise<string>;
/**
* When executing scorers that come from both the Judgment API and local scorers, we're left with
* results for each type of scorer. This function merges the results from the API and local evaluations,
* grouped by example.
*/
export declare function mergeResults(apiResults: ScoringResult[], localResults: ScoringResult[]): ScoringResult[];
/**
* Checks if any ScoringResult objects are missing scorersData
*/
export declare function checkMissingScorerData(results: ScoringResult[]): ScoringResult[];
/**
* Checks if the example contains the necessary parameters for the scorer
*/
export declare function checkExamples(examples: Example[], scorers: APIJudgmentScorer[]): void;
/**
* Executes an evaluation of Examples using one or more Scorers
*/
export declare function runEval(evaluationRun: EvaluationRun, override?: boolean, ignoreErrors?: boolean, asyncExecution?: boolean): Promise<ScoringResult[]>;
/**
* Collects all failed scorers from the scoring results
* Raises exceptions for any failed test cases
*/
export declare function assertTest(scoringResults: ScoringResult[]): void;