UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

293 lines 11.4 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScorerWrapper = exports.JudgevalScorer = exports.APIJudgmentScorer = void 0; const constants_js_1 = require("../constants.js"); const token_costs_js_1 = require("../common/token-costs.js"); const logger_js_1 = require("../common/logger.js"); /** * Base class for API judgment scorers */ class APIJudgmentScorer { get scoreType() { return this.type; } // For backward compatibility constructor(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { this.requiredFields = ['input', 'actual_output']; // Default required fields this.type = type; this.threshold = threshold; this.additional_metadata = additional_metadata; this.strict_mode = strict_mode; this.async_mode = async_mode; this.verbose_mode = verbose_mode; this.include_reason = include_reason; } /** * Check if the score meets the threshold */ successCheck() { if (this.score === undefined) { return false; } return this.score >= this.threshold; } /** * Validate that the threshold is within the allowed range */ validateThreshold() { // Check if this is an unbounded scorer const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase()); if (isUnbounded) { if (this.threshold < 0) { throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`); } } else { if (this.threshold < 0 || this.threshold > 1) { throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`); } } } /** * Convert the scorer to a plain object */ toJSON() { const result = { score_type: this.type, threshold: this.threshold, score: this.score, score_breakdown: this.score_breakdown, additional_metadata: this.additional_metadata, strict_mode: this.strict_mode, async_mode: this.async_mode, verbose_mode: this.verbose_mode, include_reason: this.include_reason, }; return result; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.APIJudgmentScorer = APIJudgmentScorer; /** * Base class for local judgment scorers */ class JudgevalScorer { constructor(type, threshold, additional_metadata, include_reason = true, async_mode = true, strict_mode = false, verbose_mode = true) { this.requiredFields = ['input', 'actualOutput']; // Default required fields this.type = type; this.scoreType = type; // For backward compatibility this.threshold = strict_mode ? 1.0 : threshold; this.strict_mode = strict_mode; this.async_mode = async_mode; this.verbose_mode = verbose_mode; this.include_reason = include_reason; this.additional_metadata = additional_metadata; this.validateThreshold(); } /** * Check if the score meets the threshold */ successCheck() { if (this.error !== undefined) { return false; } return this.score !== undefined && this.score >= this.threshold; } /** * Internal method to check success * This is equivalent to Python's _success_check method */ _successCheck() { return this.successCheck(); } /** * Validate that the threshold is within the allowed range */ validateThreshold() { // Check if this is an unbounded scorer const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase()); if (isUnbounded) { if (this.threshold < 0) { throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`); } } else { if (this.threshold < 0 || this.threshold > 1) { throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`); } } } /** * Convert the scorer to a plain object */ toJSON() { return { score_type: this.type, threshold: this.threshold, score: this.score, score_breakdown: this.score_breakdown, reason: this.reason, success: this.success, evaluation_model: this.evaluation_model, strict_mode: this.strict_mode, async_mode: this.async_mode, verbose_mode: this.verbose_mode, include_reason: this.include_reason, error: this.error, evaluation_cost: this.evaluation_cost, verbose_logs: this.verbose_logs, additional_metadata: this.additional_metadata, }; } /** * Check if example has required parameters * This is equivalent to Python's check_example_params function */ _checkExampleParams(example) { for (const param of this.requiredFields) { if (param === 'input' && !example.input) { throw new Error(`Example is missing required parameter: input`); } else if (param === 'actualOutput' && !example.actualOutput) { throw new Error(`Example is missing required parameter: actualOutput`); } else if (param === 'expectedOutput' && !example.expectedOutput) { throw new Error(`Example is missing required parameter: expectedOutput`); } else if (param === 'context' && (!example.context || !Array.isArray(example.context))) { throw new Error(`Example is missing required parameter: context (must be an array)`); } else if (param === 'retrievalContext' && (!example.retrievalContext || !Array.isArray(example.retrievalContext))) { throw new Error(`Example is missing required parameter: retrievalContext (must be an array)`); } } } /** * Get the name of the scorer * This is equivalent to Python's __name__ property */ get name() { return this.type; } /** * Calculate token costs for model usage * This is a utility method that all scorers can use * * @param model Model name (e.g., 'gpt-3.5-turbo') * @param promptTokens Number of prompt tokens * @param completionTokens Number of completion tokens * @returns Total cost in USD */ _calculateTokenCosts(model, promptTokens, completionTokens) { try { const costResult = (0, token_costs_js_1.calculateTokenCosts)(model, promptTokens, completionTokens); return costResult.totalCostUsd; } catch (e) { (0, logger_js_1.warn)(`Error calculating token costs: ${e}`); return 0; } } } exports.JudgevalScorer = JudgevalScorer; /** * Wrapper for scorers to allow dynamic loading of implementations */ class ScorerWrapper { constructor(scorer) { this.scorer = scorer; this.type = scorer.type; this.scoreType = scorer.scoreType || scorer.score_type; // For backward compatibility this.threshold = scorer.threshold; this.score = scorer.score; this.score_breakdown = scorer.score_breakdown; this.reason = scorer.reason; this.success = scorer.success; this.evaluation_model = scorer.evaluation_model; this.strict_mode = scorer.strict_mode; this.async_mode = scorer.async_mode; this.verbose_mode = scorer.verbose_mode; this.include_reason = scorer.include_reason; this.error = scorer.error; this.evaluation_cost = scorer.evaluation_cost; this.verbose_logs = scorer.verbose_logs; this.additional_metadata = scorer.additional_metadata; this.requiredFields = scorer.requiredFields; } /** * Check if the score meets the threshold */ successCheck() { if (this.score === undefined) { return false; } return this.score >= this.threshold; } /** * Load the appropriate implementation based on the use_judgment flag */ loadImplementation(useJudgment = true) { // This would be implemented based on the specific scorer types // For now, we'll just return the scorer as is if (useJudgment) { // Return API implementation return this.scorer; } else { // Return local implementation return this.scorer; } } /** * Validate that the threshold is within the allowed range */ validateThreshold() { // Check if this is an unbounded scorer const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase()); if (isUnbounded) { if (this.threshold < 0) { throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`); } } else { if (this.threshold < 0 || this.threshold > 1) { throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`); } } } /** * Convert the scorer to a plain object */ toJSON() { return { score_type: this.type, threshold: this.threshold, score: this.score, score_breakdown: this.score_breakdown, reason: this.reason, success: this.success, evaluation_model: this.evaluation_model, strict_mode: this.strict_mode, async_mode: this.async_mode, verbose_mode: this.verbose_mode, include_reason: this.include_reason, error: this.error, evaluation_cost: this.evaluation_cost, verbose_logs: this.verbose_logs, additional_metadata: this.additional_metadata, requiredFields: this.requiredFields, }; } } exports.ScorerWrapper = ScorerWrapper; //# sourceMappingURL=base-scorer.js.map