@maximai/maxim-js
Version:
Maxim AI JS SDK. Visit https://getmaxim.ai for more info.
130 lines (129 loc) • 5.67 kB
TypeScript
import type { DataStructure } from "../models/dataset";
import type { CombinedLocalEvaluatorType, LocalEvaluatorType, PassFailCriteriaType } from "../models/evaluator";
/**
* Creates a custom evaluator for local evaluation of test run outputs.
*
* Local evaluators run client-side during test runs to score each executed row (with output and retrieved context).
* They must define both an evaluation function and pass/fail criteria to determine success.
*
* @template T - The data structure type for the evaluator, extending DataStructure or undefined
* @param name - Unique name for the evaluator. Must be unique within a test run.
* @param evaluationFunction - Function that scores outputs
* @param passFailCriteria - Criteria defining pass/fail thresholds
* @returns A configured local evaluator ready for use in test runs
* @throws {Error} When passFailCriteria is null, undefined, or invalid
* @example
* import { createCustomEvaluator, createDataStructure } from '@maximai/maxim-js';
*
* const dataStructure = createDataStructure({
* input: "INPUT",
* expectedOutput: "EXPECTED_OUTPUT"
* });
*
* const lengthEvaluator = createCustomEvaluator<typeof dataStructure>(
* "response-length",
* (result, data, variables) => {
* const wordCount = result.output.split(' ').length;
* return {
* score: wordCount,
* reasoning: `Response contains ${wordCount} words`
* };
* },
* {
* onEachEntry: {
* scoreShouldBe: ">=",
* value: 10
* },
* forTestrunOverall: {
* overallShouldBe: ">=",
* value: 80,
* for: "percentageOfPassedResults"
* }
* }
* );
*
* @example
* // Boolean evaluator example
* const containsKeywordEvaluator = createCustomEvaluator<typeof dataStructure>(
* "keyword-checker",
* (result, data, variables) => ({
* score: result.output.toLowerCase().includes("important"),
* reasoning: result.output.includes("important") ? "Contains keyword" : "Missing keyword"
* }),
* {
* onEachEntry: {
* scoreShouldBe: "=",
* value: true
* },
* forTestrunOverall: {
* overallShouldBe: ">=",
* value: 75,
* for: "percentageOfPassedResults"
* }
* }
* );
*/
export declare function createCustomEvaluator<T extends DataStructure | undefined = undefined>(name: string, evaluationFunction: LocalEvaluatorType<T>["evaluationFunction"], passFailCriteria: LocalEvaluatorType<T>["passFailCriteria"]): LocalEvaluatorType<T>;
/**
* Creates a builder for combined evaluators that can output multiple evaluator scores under the same evaluation function.
*
* Combined evaluators allow a single evaluation function to return multiple named scores,
* useful when one analysis can produce several metrics. Each named score must have
* corresponding pass/fail criteria.
*
* @template U - String literal array type containing evaluator names
* @param names - Array of evaluator names that will be returned by the evaluation function
* @returns Builder object with a `build` method to create the combined evaluator
* @example
* import { createCustomCombinedEvaluatorsFor, createDataStructure } from '@maximai/maxim-js';
*
* const dataStructure = createDataStructure({
* input: "INPUT",
* expectedOutput: "EXPECTED_OUTPUT"
* });
*
* const qualityEvaluator = createCustomCombinedEvaluatorsFor("accuracy", "relevance", "fluency")
* .build<typeof dataStructure>(
* (result, data, variables) => {
* // Single function returns multiple scores
* const analysis = analyzeText(result.output);
* return {
* accuracy: { score: analysis.factualScore, reasoning: "Fact-checked against sources" },
* relevance: { score: analysis.topicScore, reasoning: "Relevance to user query" },
* fluency: { score: analysis.grammarScore, reasoning: "Grammar and readability" }
* };
* },
* {
* accuracy: {
* onEachEntry: { scoreShouldBe: ">=", value: 0.8 },
* forTestrunOverall: { overallShouldBe: ">=", value: 85, for: "average" }
* },
* relevance: {
* onEachEntry: { scoreShouldBe: ">=", value: 0.7 },
* forTestrunOverall: { overallShouldBe: ">=", value: 80, for: "average" }
* },
* fluency: {
* onEachEntry: { scoreShouldBe: ">=", value: 0.9 },
* forTestrunOverall: { overallShouldBe: ">=", value: 90, for: "percentageOfPassedResults" }
* }
* }
* );
*
* // Usage in a test run
* maxim.createTestRun("quality-test", "workspace-id")
* .withEvaluators(qualityEvaluator)
* .run();
*/
export declare function createCustomCombinedEvaluatorsFor<U extends readonly [string, ...string[]]>(...names: U): {
/**
* Builds the combined evaluator with evaluation function and pass/fail criteria.
*
* @template T - The data structure type for the evaluator
* @param evaluationFunction - Function returning multiple named scores
* @param passFailCriteria - Criteria for each named evaluator
* @returns The configured combined evaluator
* @throws {Error} When passFailCriteria is missing or contains invalid criteria
* @throws {Error} When passFailCriteria contains evaluator names not in the names array
*/
build: <T extends DataStructure | undefined = undefined>(evaluationFunction: CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>["evaluationFunction"], passFailCriteria: CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>["passFailCriteria"]) => CombinedLocalEvaluatorType<T, Record<U[number], PassFailCriteriaType>>;
};