judgeval

Version:

Judgment SDK for TypeScript/JavaScript

79 lines (78 loc) • 3.68 kB

TypeScript

import { Example } from './data/example.js'; import { EvaluationRun } from './evaluation-run.js'; import { ScoringResult } from './data/result.js'; import { APIJudgmentScorer } from './scorers/base-scorer.js'; /** * Custom error for Judgment API errors */ export declare class JudgmentAPIError extends Error { constructor(message: string); } /** * Validates an API response to ensure it has the expected format * Throws a JudgmentAPIError if the response is invalid */ export declare function validateApiResponse(response: any): void; /** * Sends an evaluation run to the RabbitMQ evaluation queue */ export declare function sendToRabbitMQ(evaluationRun: EvaluationRun): Promise<any>; /** * Checks the status of an async evaluation * @param evaluationRun The evaluation run to check * @returns The status of the evaluation */ export declare function checkEvaluationStatus(evaluationRun: EvaluationRun): Promise<any>; /** * Polls the status of an async evaluation until it's complete * @param evaluationRun The evaluation run to poll * @param intervalMs The interval between polls in milliseconds * @param maxAttempts The maximum number of polling attempts * @param onProgress Optional callback for progress updates * @returns The evaluation results */ export declare function pollEvaluationStatus(evaluationRun: EvaluationRun, intervalMs?: number, maxAttempts?: number, onProgress?: (status: any) => void): Promise<ScoringResult[]>; /** * Executes an evaluation of a list of Examples using one or more JudgmentScorers via the Judgment API * @param evaluationRun The evaluation run object containing the examples, scorers, and metadata * @returns The results of the evaluation */ export declare function executeApiEval(evaluationRun: EvaluationRun): Promise<any[]>; /** * Checks if an evaluation run name already exists for a given project */ export declare function checkEvalRunNameExists(evalName: string, projectName: string, judgmentApiKey: string, organizationId: string): Promise<void>; export declare let hasLoggedUrl: boolean; /** * Logs evaluation results to the Judgment API database. * @param results The results to log * @param projectName The project name * @param evalName The evaluation run name * @param apiKey The API key for the Judgment API * @param organizationId The organization ID * @returns A URL to view the results in the Judgment UI */ export declare function logEvaluationResults(results: ScoringResult[], projectName: string, evalName: string, apiKey: string | undefined, organizationId: string): Promise<string>; /** * When executing scorers that come from both the Judgment API and local scorers, we're left with * results for each type of scorer. This function merges the results from the API and local evaluations, * grouped by example. */ export declare function mergeResults(apiResults: ScoringResult[], localResults: ScoringResult[]): ScoringResult[]; /** * Checks if any ScoringResult objects are missing scorersData */ export declare function checkMissingScorerData(results: ScoringResult[]): ScoringResult[]; /** * Checks if the example contains the necessary parameters for the scorer */ export declare function checkExamples(examples: Example[], scorers: APIJudgmentScorer[]): void; /** * Executes an evaluation of Examples using one or more Scorers */ export declare function runEval(evaluationRun: EvaluationRun, override?: boolean, ignoreErrors?: boolean, asyncExecution?: boolean): Promise<ScoringResult[]>; /** * Collects all failed scorers from the scoring results * Raises exceptions for any failed test cases */ export declare function assertTest(scoringResults: ScoringResult[]): void;