UNPKG

judgeval

Version:

Judgment SDK for TypeScript/JavaScript

282 lines 16.7 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ScorerWrapper = exports.Text2SQLScorer = exports.SummarizationScorer = exports.JsonCorrectnessScorer = exports.InstructionAdherenceScorer = exports.HallucinationScorer = exports.GroundednessScorer = exports.FaithfulnessScorer = exports.ExecutionOrderScorer = exports.ContextualRelevancyScorer = exports.ContextualRecallScorer = exports.ContextualPrecisionScorer = exports.ComparisonScorer = exports.AnswerRelevancyScorer = exports.AnswerCorrectnessScorer = void 0; const base_scorer_js_1 = require("./base-scorer.js"); /** * Implementation of API-based scorers */ class AnswerCorrectnessScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('answer_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.AnswerCorrectnessScorer = AnswerCorrectnessScorer; class AnswerRelevancyScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('answer_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.AnswerRelevancyScorer = AnswerRelevancyScorer; class ComparisonScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.5, criteria = ['Accuracy', 'Helpfulness', 'Relevance'], description = 'Compare the outputs based on the given criteria', additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('comparison', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.criteria = criteria; this.description = description; // Comparison is an unbounded scorer, only validate that threshold >= 0 if (threshold < 0) { throw new Error(`Threshold for comparison must be greater than or equal to 0, got: ${threshold}`); } } toJSON() { const base = super.toJSON(); return Object.assign(Object.assign({}, base), { criteria: this.criteria, description: this.description }); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.ComparisonScorer = ComparisonScorer; class ContextualPrecisionScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('contextual_precision', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); this.requiredFields = ['input', 'actual_output', 'context']; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.ContextualPrecisionScorer = ContextualPrecisionScorer; class ContextualRecallScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('contextual_recall', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); this.requiredFields = ['input', 'actual_output', 'context']; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.ContextualRecallScorer = ContextualRecallScorer; class ContextualRelevancyScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('contextual_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); this.requiredFields = ['input', 'actual_output', 'retrieval_context']; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.ContextualRelevancyScorer = ContextualRelevancyScorer; class ExecutionOrderScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 1.0, expectedTools, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('execution_order', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.strictMode = strict_mode; this.expectedTools = expectedTools; this.validateThreshold(); } toJSON() { const base = super.toJSON(); return Object.assign(Object.assign({}, base), { expected_tools: this.expectedTools }); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.ExecutionOrderScorer = ExecutionOrderScorer; class FaithfulnessScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('faithfulness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); this.requiredFields = ['input', 'actual_output', 'context']; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.FaithfulnessScorer = FaithfulnessScorer; class GroundednessScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('groundedness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.GroundednessScorer = GroundednessScorer; class HallucinationScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('hallucination', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); this.requiredFields = ['input', 'actual_output', 'context']; } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.HallucinationScorer = HallucinationScorer; class InstructionAdherenceScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('instruction_adherence', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } toJSON() { return super.toJSON(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.InstructionAdherenceScorer = InstructionAdherenceScorer; class JsonCorrectnessScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, jsonSchema, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('json_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.jsonSchema = jsonSchema; this.validateThreshold(); } toJSON() { const base = super.toJSON(); return Object.assign(Object.assign({}, base), { json_schema: this.jsonSchema }); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.JsonCorrectnessScorer = JsonCorrectnessScorer; class SummarizationScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('summarization', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.SummarizationScorer = SummarizationScorer; class Text2SQLScorer extends base_scorer_js_1.APIJudgmentScorer { constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { super('text2sql', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); this.validateThreshold(); } a_score_example(example) { return __awaiter(this, void 0, void 0, function* () { throw new Error('API scorers are evaluated on the server side'); }); } } exports.Text2SQLScorer = Text2SQLScorer; // Create a ScorerWrapper class to dynamically load the appropriate implementation class ScorerWrapper { constructor(scorer) { this.scorer = scorer; } get scoreType() { return this.scorer.scoreType; } get threshold() { return this.scorer.threshold; } get additional_metadata() { return this.scorer.additional_metadata; } toJSON() { return this.scorer.toJSON(); } static fromType(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) { switch (type.toLowerCase()) { case 'answer_correctness': return new AnswerCorrectnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'answer_relevancy': return new AnswerRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'comparison': // For comparison, extract criteria and description from metadata if available const criteria = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.criteria) || ['Accuracy', 'Helpfulness', 'Relevance']; const description = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.description) || 'Compare the outputs based on the given criteria'; const comparisonMetadata = Object.assign({}, additional_metadata); comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.criteria; comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.description; return new ComparisonScorer(threshold, criteria, description, comparisonMetadata, strict_mode, async_mode, verbose_mode, include_reason); case 'contextual_precision': return new ContextualPrecisionScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'contextual_recall': return new ContextualRecallScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'contextual_relevancy': return new ContextualRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'execution_order': // For execution order, extract strict_mode and expected_tools from metadata if available const strictMode = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.strict_mode) || false; const expectedTools = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.expected_tools; const executionOrderMetadata = Object.assign({}, additional_metadata); executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.strict_mode; executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.expected_tools; return new ExecutionOrderScorer(threshold, expectedTools, executionOrderMetadata, strictMode, async_mode, verbose_mode, include_reason); case 'faithfulness': return new FaithfulnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'groundedness': return new GroundednessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'hallucination': return new HallucinationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'instruction_adherence': return new InstructionAdherenceScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'json_correctness': // For JSON correctness, extract json_schema from metadata if available const jsonSchema = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.json_schema; const jsonMetadata = Object.assign({}, additional_metadata); jsonMetadata === null || jsonMetadata === void 0 ? true : delete jsonMetadata.json_schema; return new JsonCorrectnessScorer(threshold, jsonSchema, jsonMetadata, strict_mode, async_mode, verbose_mode, include_reason); case 'summarization': return new SummarizationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); case 'text2sql': return new Text2SQLScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason); default: throw new Error(`Unknown scorer type: ${type}`); } } } exports.ScorerWrapper = ScorerWrapper; //# sourceMappingURL=api-scorer.js.map