judgeval
Version:
Judgment SDK for TypeScript/JavaScript
293 lines • 11.4 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ScorerWrapper = exports.JudgevalScorer = exports.APIJudgmentScorer = void 0;
const constants_js_1 = require("../constants.js");
const token_costs_js_1 = require("../common/token-costs.js");
const logger_js_1 = require("../common/logger.js");
/**
* Base class for API judgment scorers
*/
class APIJudgmentScorer {
get scoreType() { return this.type; } // For backward compatibility
constructor(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
this.requiredFields = ['input', 'actual_output']; // Default required fields
this.type = type;
this.threshold = threshold;
this.additional_metadata = additional_metadata;
this.strict_mode = strict_mode;
this.async_mode = async_mode;
this.verbose_mode = verbose_mode;
this.include_reason = include_reason;
}
/**
* Check if the score meets the threshold
*/
successCheck() {
if (this.score === undefined) {
return false;
}
return this.score >= this.threshold;
}
/**
* Validate that the threshold is within the allowed range
*/
validateThreshold() {
// Check if this is an unbounded scorer
const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
if (isUnbounded) {
if (this.threshold < 0) {
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
}
}
else {
if (this.threshold < 0 || this.threshold > 1) {
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
}
}
}
/**
* Convert the scorer to a plain object
*/
toJSON() {
const result = {
score_type: this.type,
threshold: this.threshold,
score: this.score,
score_breakdown: this.score_breakdown,
additional_metadata: this.additional_metadata,
strict_mode: this.strict_mode,
async_mode: this.async_mode,
verbose_mode: this.verbose_mode,
include_reason: this.include_reason,
};
return result;
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.APIJudgmentScorer = APIJudgmentScorer;
/**
* Base class for local judgment scorers
*/
class JudgevalScorer {
constructor(type, threshold, additional_metadata, include_reason = true, async_mode = true, strict_mode = false, verbose_mode = true) {
this.requiredFields = ['input', 'actualOutput']; // Default required fields
this.type = type;
this.scoreType = type; // For backward compatibility
this.threshold = strict_mode ? 1.0 : threshold;
this.strict_mode = strict_mode;
this.async_mode = async_mode;
this.verbose_mode = verbose_mode;
this.include_reason = include_reason;
this.additional_metadata = additional_metadata;
this.validateThreshold();
}
/**
* Check if the score meets the threshold
*/
successCheck() {
if (this.error !== undefined) {
return false;
}
return this.score !== undefined && this.score >= this.threshold;
}
/**
* Internal method to check success
* This is equivalent to Python's _success_check method
*/
_successCheck() {
return this.successCheck();
}
/**
* Validate that the threshold is within the allowed range
*/
validateThreshold() {
// Check if this is an unbounded scorer
const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
if (isUnbounded) {
if (this.threshold < 0) {
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
}
}
else {
if (this.threshold < 0 || this.threshold > 1) {
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
}
}
}
/**
* Convert the scorer to a plain object
*/
toJSON() {
return {
score_type: this.type,
threshold: this.threshold,
score: this.score,
score_breakdown: this.score_breakdown,
reason: this.reason,
success: this.success,
evaluation_model: this.evaluation_model,
strict_mode: this.strict_mode,
async_mode: this.async_mode,
verbose_mode: this.verbose_mode,
include_reason: this.include_reason,
error: this.error,
evaluation_cost: this.evaluation_cost,
verbose_logs: this.verbose_logs,
additional_metadata: this.additional_metadata,
};
}
/**
* Check if example has required parameters
* This is equivalent to Python's check_example_params function
*/
_checkExampleParams(example) {
for (const param of this.requiredFields) {
if (param === 'input' && !example.input) {
throw new Error(`Example is missing required parameter: input`);
}
else if (param === 'actualOutput' && !example.actualOutput) {
throw new Error(`Example is missing required parameter: actualOutput`);
}
else if (param === 'expectedOutput' && !example.expectedOutput) {
throw new Error(`Example is missing required parameter: expectedOutput`);
}
else if (param === 'context' && (!example.context || !Array.isArray(example.context))) {
throw new Error(`Example is missing required parameter: context (must be an array)`);
}
else if (param === 'retrievalContext' && (!example.retrievalContext || !Array.isArray(example.retrievalContext))) {
throw new Error(`Example is missing required parameter: retrievalContext (must be an array)`);
}
}
}
/**
* Get the name of the scorer
* This is equivalent to Python's __name__ property
*/
get name() {
return this.type;
}
/**
* Calculate token costs for model usage
* This is a utility method that all scorers can use
*
* @param model Model name (e.g., 'gpt-3.5-turbo')
* @param promptTokens Number of prompt tokens
* @param completionTokens Number of completion tokens
* @returns Total cost in USD
*/
_calculateTokenCosts(model, promptTokens, completionTokens) {
try {
const costResult = (0, token_costs_js_1.calculateTokenCosts)(model, promptTokens, completionTokens);
return costResult.totalCostUsd;
}
catch (e) {
(0, logger_js_1.warn)(`Error calculating token costs: ${e}`);
return 0;
}
}
}
exports.JudgevalScorer = JudgevalScorer;
/**
* Wrapper for scorers to allow dynamic loading of implementations
*/
class ScorerWrapper {
constructor(scorer) {
this.scorer = scorer;
this.type = scorer.type;
this.scoreType = scorer.scoreType || scorer.score_type; // For backward compatibility
this.threshold = scorer.threshold;
this.score = scorer.score;
this.score_breakdown = scorer.score_breakdown;
this.reason = scorer.reason;
this.success = scorer.success;
this.evaluation_model = scorer.evaluation_model;
this.strict_mode = scorer.strict_mode;
this.async_mode = scorer.async_mode;
this.verbose_mode = scorer.verbose_mode;
this.include_reason = scorer.include_reason;
this.error = scorer.error;
this.evaluation_cost = scorer.evaluation_cost;
this.verbose_logs = scorer.verbose_logs;
this.additional_metadata = scorer.additional_metadata;
this.requiredFields = scorer.requiredFields;
}
/**
* Check if the score meets the threshold
*/
successCheck() {
if (this.score === undefined) {
return false;
}
return this.score >= this.threshold;
}
/**
* Load the appropriate implementation based on the use_judgment flag
*/
loadImplementation(useJudgment = true) {
// This would be implemented based on the specific scorer types
// For now, we'll just return the scorer as is
if (useJudgment) {
// Return API implementation
return this.scorer;
}
else {
// Return local implementation
return this.scorer;
}
}
/**
* Validate that the threshold is within the allowed range
*/
validateThreshold() {
// Check if this is an unbounded scorer
const isUnbounded = Array.from(constants_js_1.UNBOUNDED_SCORERS).some(scorer => scorer.toLowerCase() === this.type.toLowerCase());
if (isUnbounded) {
if (this.threshold < 0) {
throw new Error(`Threshold for ${this.type} must be greater than or equal to 0, got: ${this.threshold}`);
}
}
else {
if (this.threshold < 0 || this.threshold > 1) {
throw new Error(`Threshold for ${this.type} must be between 0 and 1, got: ${this.threshold}`);
}
}
}
/**
* Convert the scorer to a plain object
*/
toJSON() {
return {
score_type: this.type,
threshold: this.threshold,
score: this.score,
score_breakdown: this.score_breakdown,
reason: this.reason,
success: this.success,
evaluation_model: this.evaluation_model,
strict_mode: this.strict_mode,
async_mode: this.async_mode,
verbose_mode: this.verbose_mode,
include_reason: this.include_reason,
error: this.error,
evaluation_cost: this.evaluation_cost,
verbose_logs: this.verbose_logs,
additional_metadata: this.additional_metadata,
requiredFields: this.requiredFields,
};
}
}
exports.ScorerWrapper = ScorerWrapper;
//# sourceMappingURL=base-scorer.js.map