UNPKG

@maximai/maxim-js

Version:

Maxim AI JS SDK. Visit https://getmaxim.ai for more info.

130 lines (129 loc) 5.67 kB
import type { DataStructure } from "../models/dataset"; import type { CombinedLocalEvaluatorType, LocalEvaluatorType, PassFailCriteriaType } from "../models/evaluator"; /** * Creates a custom evaluator for local evaluation of test run outputs. * * Local evaluators run client-side during test runs to score each executed row (with output and retrieved context). * They must define both an evaluation function and pass/fail criteria to determine success. * * @template T - The data structure type for the evaluator, extending DataStructure or undefined * @param name - Unique name for the evaluator. Must be unique within a test run. * @param evaluationFunction - Function that scores outputs * @param passFailCriteria - Criteria defining pass/fail thresholds * @returns A configured local evaluator ready for use in test runs * @throws {Error} When passFailCriteria is null, undefined, or invalid * @example * import { createCustomEvaluator, createDataStructure } from '@maximai/maxim-js'; * * const dataStructure = createDataStructure({ * input: "INPUT", * expectedOutput: "EXPECTED_OUTPUT" * }); * * const lengthEvaluator = createCustomEvaluator<typeof dataStructure>( * "response-length", * (result, data, variables) => { * const wordCount = result.output.split(' ').length; * return { * score: wordCount, * reasoning: `Response contains ${wordCount} words` * }; * }, * { * onEachEntry: { * scoreShouldBe: ">=", * value: 10 * }, * forTestrunOverall: { * overallShouldBe: ">=", * value: 80, * for: "percentageOfPassedResults" * } * } * ); * * @example * // Boolean evaluator example * const containsKeywordEvaluator = createCustomEvaluator<typeof dataStructure>( * "keyword-checker", * (result, data, variables) => ({ * score: result.output.toLowerCase().includes("important"), * reasoning: result.output.includes("important") ? "Contains keyword" : "Missing keyword" * }), * { * onEachEntry: { * scoreShouldBe: "=", * value: true * }, * forTestrunOverall: { * overallShouldBe: ">=", * value: 75, * for: "percentageOfPassedResults" * } * } * ); */ export declare function createCustomEvaluator<T extends DataStructure | undefined = undefined>(name: string, evaluationFunction: LocalEvaluatorType<T>["evaluationFunction"], passFailCriteria: LocalEvaluatorType<T>["passFailCriteria"]): LocalEvaluatorType<T>; /** * Creates a builder for combined evaluators that can output multiple evaluator scores under the same evaluation function. * * Combined evaluators allow a single evaluation function to return multiple named scores, * useful when one analysis can produce several metrics. Each named score must have * corresponding pass/fail criteria. * * @template U - String literal array type containing evaluator names * @param names - Array of evaluator names that will be returned by the evaluation function * @returns Builder object with a `build` method to create the combined evaluator * @example * import { createCustomCombinedEvaluatorsFor, createDataStructure } from '@maximai/maxim-js'; * * const dataStructure = createDataStructure({ * input: "INPUT", * expectedOutput: "EXPECTED_OUTPUT" * }); * * const qualityEvaluator = createCustomCombinedEvaluatorsFor("accuracy", "relevance", "fluency") * .build<typeof dataStructure>( * (result, data, variables) => { * // Single function returns multiple scores * const analysis = analyzeText(result.output); * return { * accuracy: { score: analysis.factualScore, reasoning: "Fact-checked against sources" }, * relevance: { score: analysis.topicScore, reasoning: "Relevance to user query" }, * fluency: { score: analysis.grammarScore, reasoning: "Grammar and readability" } * }; * }, * { * accuracy: { * onEachEntry: { scoreShouldBe: ">=", value: 0.8 }, * forTestrunOverall: { overallShouldBe: ">=", value: 85, for: "average" } * }, * relevance: { * onEachEntry: { scoreShouldBe: ">=", value: 0.7 }, * forTestrunOverall: { overallShouldBe: ">=", value: 80, for: "average" } * }, * fluency: { * onEachEntry: { scoreShouldBe: ">=", value: 0.9 }, * forTestrunOverall: { overallShouldBe: ">=", value: 90, for: "percentageOfPassedResults" } * } * } * ); * * // Usage in a test run * maxim.createTestRun("quality-test", "workspace-id") * .withEvaluators(qualityEvaluator) * .run(); */ export declare function createCustomCombinedEvaluatorsFor<U extends readonly [string, ...string[]]>(...names: U): { /** * Builds the combined evaluator with evaluation function and pass/fail criteria. * * @template T - The data structure type for the evaluator * @param evaluationFunction - Function returning multiple named scores * @param passFailCriteria - Criteria for each named evaluator * @returns The configured combined evaluator * @throws {Error} When passFailCriteria is missing or contains invalid criteria * @throws {Error} When passFailCriteria contains evaluator names not in the names array */ build: <T extends DataStructure | undefined = undefined>(evaluationFunction: CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>["evaluationFunction"], passFailCriteria: CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>["passFailCriteria"]) => CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>; };