UNPKG

@maximai/maxim-js

Version:

Maxim AI JS SDK. Visit https://getmaxim.ai for more info.

203 lines • 8.89 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.createCustomEvaluator = createCustomEvaluator; exports.createCustomCombinedEvaluatorsFor = createCustomCombinedEvaluatorsFor; /** * Creates a custom evaluator for local evaluation of test run outputs. * * Local evaluators run client-side during test runs to score each executed row (with output and retrieved context). * They must define both an evaluation function and pass/fail criteria to determine success. * * @template T - The data structure type for the evaluator, extending DataStructure or undefined * @param name - Unique name for the evaluator. Must be unique within a test run. * @param evaluationFunction - Function that scores outputs * @param passFailCriteria - Criteria defining pass/fail thresholds * @returns A configured local evaluator ready for use in test runs * @throws {Error} When passFailCriteria is null, undefined, or invalid * @example * import { createCustomEvaluator, createDataStructure } from '@maximai/maxim-js'; * * const dataStructure = createDataStructure({ * input: "INPUT", * expectedOutput: "EXPECTED_OUTPUT" * }); * * const lengthEvaluator = createCustomEvaluator<typeof dataStructure>( * "response-length", * (result, data, variables) => { * const wordCount = result.output.split(' ').length; * return { * score: wordCount, * reasoning: `Response contains ${wordCount} words` * }; * }, * { * onEachEntry: { * scoreShouldBe: ">=", * value: 10 * }, * forTestrunOverall: { * overallShouldBe: ">=", * value: 80, * for: "percentageOfPassedResults" * } * } * ); * * @example * // Boolean evaluator example * const containsKeywordEvaluator = createCustomEvaluator<typeof dataStructure>( * "keyword-checker", * (result, data, variables) => ({ * score: result.output.toLowerCase().includes("important"), * reasoning: result.output.includes("important") ? "Contains keyword" : "Missing keyword" * }), * { * onEachEntry: { * scoreShouldBe: "=", * value: true * }, * forTestrunOverall: { * overallShouldBe: ">=", * value: 75, * for: "percentageOfPassedResults" * } * } * ); */ function createCustomEvaluator(name, evaluationFunction, passFailCriteria) { if (!passFailCriteria) { throw new Error(`Error while creating evaluator ${name}: passFailCriteria is required`); } sanitizePassFailCriteria(name, passFailCriteria); return { name, evaluationFunction, passFailCriteria, }; } /** * Creates a builder for combined evaluators that can output multiple evaluator scores under the same evaluation function. * * Combined evaluators allow a single evaluation function to return multiple named scores, * useful when one analysis can produce several metrics. Each named score must have * corresponding pass/fail criteria. * * @template U - String literal array type containing evaluator names * @param names - Array of evaluator names that will be returned by the evaluation function * @returns Builder object with a `build` method to create the combined evaluator * @example * import { createCustomCombinedEvaluatorsFor, createDataStructure } from '@maximai/maxim-js'; * * const dataStructure = createDataStructure({ * input: "INPUT", * expectedOutput: "EXPECTED_OUTPUT" * }); * * const qualityEvaluator = createCustomCombinedEvaluatorsFor("accuracy", "relevance", "fluency") * .build<typeof dataStructure>( * (result, data, variables) => { * // Single function returns multiple scores * const analysis = analyzeText(result.output); * return { * accuracy: { score: analysis.factualScore, reasoning: "Fact-checked against sources" }, * relevance: { score: analysis.topicScore, reasoning: "Relevance to user query" }, * fluency: { score: analysis.grammarScore, reasoning: "Grammar and readability" } * }; * }, * { * accuracy: { * onEachEntry: { scoreShouldBe: ">=", value: 0.8 }, * forTestrunOverall: { overallShouldBe: ">=", value: 85, for: "average" } * }, * relevance: { * onEachEntry: { scoreShouldBe: ">=", value: 0.7 }, * forTestrunOverall: { overallShouldBe: ">=", value: 80, for: "average" } * }, * fluency: { * onEachEntry: { scoreShouldBe: ">=", value: 0.9 }, * forTestrunOverall: { overallShouldBe: ">=", value: 90, for: "percentageOfPassedResults" } * } * } * ); * * // Usage in a test run * maxim.createTestRun("quality-test", "workspace-id") * .withEvaluators(qualityEvaluator) * .run(); */ function createCustomCombinedEvaluatorsFor(...names) { return { /** * Builds the combined evaluator with evaluation function and pass/fail criteria. * * @template T - The data structure type for the evaluator * @param evaluationFunction - Function returning multiple named scores * @param passFailCriteria - Criteria for each named evaluator * @returns The configured combined evaluator * @throws {Error} When passFailCriteria is missing or contains invalid criteria * @throws {Error} When passFailCriteria contains evaluator names not in the names array */ build: function (evaluationFunction, passFailCriteria) { if (!passFailCriteria) { throw new Error(`Error while creating combined evaluator with evaluators ${names.join(", ")}: passFailCriteria is required`); } const missingPassFailCriteriaNames = Object.keys(passFailCriteria).filter((evaluatorName) => { if (!names.includes(evaluatorName)) { return true; } return false; }); if (missingPassFailCriteriaNames.length > 0) { throw new Error(`Error while creating combined evaluator with evaluators ${names.join(", ")}: criteria has evaluator names ${missingPassFailCriteriaNames.join(", ")} which are not in the names array`); } const invalidPassFailCriteriaErrors = []; Object.entries(passFailCriteria).forEach(([evaluatorName, criteria]) => { try { sanitizePassFailCriteria(evaluatorName, criteria); } catch (err) { invalidPassFailCriteriaErrors.push(err instanceof Error ? err.message : JSON.stringify(err)); } }); if (invalidPassFailCriteriaErrors.length > 0) { throw new Error(`Error while creating combined Evaluator with names ${names} due to invalid pass fail criteria: ${invalidPassFailCriteriaErrors.join(", ")}`); } return { names, evaluationFunction, passFailCriteria, }; }, }; } function sanitizePassFailCriteria(name, passFailCriteria) { const allOperators = [">=", "<=", "<", ">", "=", "!="]; const booleanOperators = ["=", "!="]; switch (typeof passFailCriteria.onEachEntry.value) { case "number": if (!allOperators.includes(passFailCriteria.onEachEntry.scoreShouldBe)) { throw new Error(`Error While Creating Evaluator ${name}: Invalid operator for scoreShouldBe, only accepts ` + allOperators.join(", ")); } break; case "boolean": if (!booleanOperators.includes(passFailCriteria.onEachEntry.scoreShouldBe)) { throw new Error(`Error While Creating Evaluator ${name}: Invalid operator for scoreShouldBe, only accepts ` + booleanOperators.join(", ")); } break; default: throw new Error(`Error While Creating Evaluator ${name}: Invalid type for onEachEntry.value, only accepts number or boolean`); } if (typeof passFailCriteria.forTestrunOverall.value === "number") { if (!allOperators.includes(passFailCriteria.forTestrunOverall.overallShouldBe)) { throw new Error(`Error While Creating Evaluator ${name}: Invalid operator for overallShouldBe, only accepts ` + allOperators.join(", ")); } if (passFailCriteria.forTestrunOverall.for !== "average" && passFailCriteria.forTestrunOverall.for !== "percentageOfPassedResults") { throw new Error(`Error While Creating Evaluator ${name}: Invalid value for \`for\` in forTestrunOverall, only accepts "average" or "percentageOfPassedResults"`); } } else { throw new Error(`Error While Creating Evaluator ${name}: Invalid type for forTestrunOverall.value, only accepts number`); } } //# sourceMappingURL=evaluators.js.map