judgeval
Version:
Judgment SDK for TypeScript/JavaScript
282 lines • 16.7 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ScorerWrapper = exports.Text2SQLScorer = exports.SummarizationScorer = exports.JsonCorrectnessScorer = exports.InstructionAdherenceScorer = exports.HallucinationScorer = exports.GroundednessScorer = exports.FaithfulnessScorer = exports.ExecutionOrderScorer = exports.ContextualRelevancyScorer = exports.ContextualRecallScorer = exports.ContextualPrecisionScorer = exports.ComparisonScorer = exports.AnswerRelevancyScorer = exports.AnswerCorrectnessScorer = void 0;
const base_scorer_js_1 = require("./base-scorer.js");
/**
* Implementation of API-based scorers
*/
class AnswerCorrectnessScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('answer_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.AnswerCorrectnessScorer = AnswerCorrectnessScorer;
class AnswerRelevancyScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('answer_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.AnswerRelevancyScorer = AnswerRelevancyScorer;
class ComparisonScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.5, criteria = ['Accuracy', 'Helpfulness', 'Relevance'], description = 'Compare the outputs based on the given criteria', additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('comparison', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.criteria = criteria;
this.description = description;
// Comparison is an unbounded scorer, only validate that threshold >= 0
if (threshold < 0) {
throw new Error(`Threshold for comparison must be greater than or equal to 0, got: ${threshold}`);
}
}
toJSON() {
const base = super.toJSON();
return Object.assign(Object.assign({}, base), { criteria: this.criteria, description: this.description });
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.ComparisonScorer = ComparisonScorer;
class ContextualPrecisionScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('contextual_precision', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
this.requiredFields = ['input', 'actual_output', 'context'];
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.ContextualPrecisionScorer = ContextualPrecisionScorer;
class ContextualRecallScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('contextual_recall', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
this.requiredFields = ['input', 'actual_output', 'context'];
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.ContextualRecallScorer = ContextualRecallScorer;
class ContextualRelevancyScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('contextual_relevancy', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
this.requiredFields = ['input', 'actual_output', 'retrieval_context'];
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.ContextualRelevancyScorer = ContextualRelevancyScorer;
class ExecutionOrderScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 1.0, expectedTools, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('execution_order', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.strictMode = strict_mode;
this.expectedTools = expectedTools;
this.validateThreshold();
}
toJSON() {
const base = super.toJSON();
return Object.assign(Object.assign({}, base), { expected_tools: this.expectedTools });
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.ExecutionOrderScorer = ExecutionOrderScorer;
class FaithfulnessScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('faithfulness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
this.requiredFields = ['input', 'actual_output', 'context'];
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.FaithfulnessScorer = FaithfulnessScorer;
class GroundednessScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('groundedness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.GroundednessScorer = GroundednessScorer;
class HallucinationScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('hallucination', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
this.requiredFields = ['input', 'actual_output', 'context'];
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.HallucinationScorer = HallucinationScorer;
class InstructionAdherenceScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('instruction_adherence', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
toJSON() {
return super.toJSON();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.InstructionAdherenceScorer = InstructionAdherenceScorer;
class JsonCorrectnessScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, jsonSchema, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('json_correctness', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.jsonSchema = jsonSchema;
this.validateThreshold();
}
toJSON() {
const base = super.toJSON();
return Object.assign(Object.assign({}, base), { json_schema: this.jsonSchema });
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.JsonCorrectnessScorer = JsonCorrectnessScorer;
class SummarizationScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('summarization', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.SummarizationScorer = SummarizationScorer;
class Text2SQLScorer extends base_scorer_js_1.APIJudgmentScorer {
constructor(threshold = 0.7, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
super('text2sql', threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
this.validateThreshold();
}
a_score_example(example) {
return __awaiter(this, void 0, void 0, function* () {
throw new Error('API scorers are evaluated on the server side');
});
}
}
exports.Text2SQLScorer = Text2SQLScorer;
// Create a ScorerWrapper class to dynamically load the appropriate implementation
class ScorerWrapper {
constructor(scorer) {
this.scorer = scorer;
}
get scoreType() {
return this.scorer.scoreType;
}
get threshold() {
return this.scorer.threshold;
}
get additional_metadata() {
return this.scorer.additional_metadata;
}
toJSON() {
return this.scorer.toJSON();
}
static fromType(type, threshold, additional_metadata, strict_mode = false, async_mode = true, verbose_mode = true, include_reason = true) {
switch (type.toLowerCase()) {
case 'answer_correctness':
return new AnswerCorrectnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'answer_relevancy':
return new AnswerRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'comparison':
// For comparison, extract criteria and description from metadata if available
const criteria = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.criteria) || ['Accuracy', 'Helpfulness', 'Relevance'];
const description = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.description) || 'Compare the outputs based on the given criteria';
const comparisonMetadata = Object.assign({}, additional_metadata);
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.criteria;
comparisonMetadata === null || comparisonMetadata === void 0 ? true : delete comparisonMetadata.description;
return new ComparisonScorer(threshold, criteria, description, comparisonMetadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'contextual_precision':
return new ContextualPrecisionScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'contextual_recall':
return new ContextualRecallScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'contextual_relevancy':
return new ContextualRelevancyScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'execution_order':
// For execution order, extract strict_mode and expected_tools from metadata if available
const strictMode = (additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.strict_mode) || false;
const expectedTools = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.expected_tools;
const executionOrderMetadata = Object.assign({}, additional_metadata);
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.strict_mode;
executionOrderMetadata === null || executionOrderMetadata === void 0 ? true : delete executionOrderMetadata.expected_tools;
return new ExecutionOrderScorer(threshold, expectedTools, executionOrderMetadata, strictMode, async_mode, verbose_mode, include_reason);
case 'faithfulness':
return new FaithfulnessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'groundedness':
return new GroundednessScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'hallucination':
return new HallucinationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'instruction_adherence':
return new InstructionAdherenceScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'json_correctness':
// For JSON correctness, extract json_schema from metadata if available
const jsonSchema = additional_metadata === null || additional_metadata === void 0 ? void 0 : additional_metadata.json_schema;
const jsonMetadata = Object.assign({}, additional_metadata);
jsonMetadata === null || jsonMetadata === void 0 ? true : delete jsonMetadata.json_schema;
return new JsonCorrectnessScorer(threshold, jsonSchema, jsonMetadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'summarization':
return new SummarizationScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
case 'text2sql':
return new Text2SQLScorer(threshold, additional_metadata, strict_mode, async_mode, verbose_mode, include_reason);
default:
throw new Error(`Unknown scorer type: ${type}`);
}
}
}
exports.ScorerWrapper = ScorerWrapper;
//# sourceMappingURL=api-scorer.js.map