judgeval
Version:
Judgment SDK for TypeScript/JavaScript
132 lines • 5.54 kB
JavaScript
import { Example } from './data/example.js';
import { APIJudgmentScorer, JudgevalScorer } from './scorers/base-scorer.js';
import { ACCEPTABLE_MODELS } from './constants.js';
export class EvaluationRun {
constructor(options) {
this.logResults = options.logResults || false;
this.organizationId = options.organizationId;
this.projectName = options.projectName;
this.evalName = options.evalName;
this.examples = options.examples;
this.scorers = options.scorers;
this.model = options.model;
this.aggregator = options.aggregator;
this.metadata = options.metadata;
this.judgmentApiKey = options.judgmentApiKey || '';
this.override = options.override || false;
this.rules = options.rules;
// Validate
this.validate();
}
/**
* Validate the evaluation run configuration
*/
validate() {
// Validate log_results
if (typeof this.logResults !== 'boolean') {
throw new Error(`logResults must be a boolean. Received ${this.logResults} of type ${typeof this.logResults}`);
}
// Validate project_name
if (this.logResults && !this.projectName) {
throw new Error('Project name is required when logResults is true. Please include the projectName argument.');
}
// Validate eval_name
if (this.logResults && !this.evalName) {
throw new Error('Eval name is required when logResults is true. Please include the evalName argument.');
}
// Validate examples
if (!this.examples || this.examples.length === 0) {
throw new Error('Examples cannot be empty.');
}
for (const example of this.examples) {
if (!(example instanceof Example)) {
throw new Error(`Invalid type for Example: ${typeof example}`);
}
}
// Validate scorers
if (!this.scorers || this.scorers.length === 0) {
throw new Error('Scorers cannot be empty.');
}
for (const scorer of this.scorers) {
if (!(scorer instanceof APIJudgmentScorer) && !(scorer instanceof JudgevalScorer)) {
throw new Error(`Invalid type for Scorer: ${typeof scorer}`);
}
}
// Validate model
if (!this.model) {
throw new Error('Model cannot be empty.');
}
// Check if model is a JudgevalJudge (would be implemented separately)
if (typeof this.model === 'object' && this.model !== null && !Array.isArray(this.model)) {
// Verify all scorers are JudgevalScorer when using JudgevalJudge
if (!this.scorers.every(s => s instanceof JudgevalScorer)) {
throw new Error('When using a JudgevalJudge model, all scorers must be JudgevalScorer type');
}
}
else if (typeof this.model === 'string') {
// Check if model is a string
if (!ACCEPTABLE_MODELS.has(this.model)) {
throw new Error(`Model name ${this.model} not recognized. Please select a valid model name.`);
}
}
else if (Array.isArray(this.model)) {
// Check if model is an array of strings
if (!this.model.every(m => typeof m === 'string')) {
throw new Error('When providing a list of models, all elements must be strings');
}
for (const m of this.model) {
if (!ACCEPTABLE_MODELS.has(m)) {
throw new Error(`Model name ${m} not recognized. Please select a valid model name.`);
}
}
}
else {
throw new Error(`Model must be one of: string, list of strings, or JudgevalJudge instance. Received type ${typeof this.model}.`);
}
// Validate aggregator
if (Array.isArray(this.model) && !this.aggregator) {
throw new Error('Aggregator cannot be empty when using multiple models.');
}
if (this.aggregator && !ACCEPTABLE_MODELS.has(this.aggregator)) {
throw new Error(`Model name ${this.aggregator} not recognized.`);
}
}
/**
* Convert the evaluation run to a plain object
*
*/
toJSON() {
const data = {
log_results: this.logResults,
organization_id: this.organizationId,
project_name: this.projectName,
eval_name: this.evalName,
examples: this.examples.map(example => example.toJSON ? example.toJSON() : example),
scorers: this.scorers.map(scorer => {
if ('toJSON' in scorer) {
return scorer.toJSON();
}
else if ('toDict' in scorer) {
return scorer.toDict();
}
else {
return {
score_type: scorer.scoreType,
threshold: scorer.threshold
};
}
}),
model: this.model,
aggregator: this.aggregator,
metadata: this.metadata,
judgment_api_key: this.judgmentApiKey,
override: this.override
};
if (this.rules) {
// Process rules to ensure proper serialization
data.rules = this.rules.map(rule => rule.toJSON ? rule.toJSON() : rule);
}
return data;
}
}
//# sourceMappingURL=evaluation-run.js.map