@maximai/maxim-js

import type { Data, DataStructure, DataValue } from "../models/dataset"; import type { CombinedLocalEvaluatorType, HumanEvaluationConfig, LocalEvaluationResult, LocalEvaluatorType, PassFailCriteriaType } from "./evaluator"; export interface TestRunLogger<T extends DataStructure | undefined = undefined> { info: (message: string) => void; error: (message: string) => void; processed: (message: string, data: { datasetEntry: Data<T>; output?: YieldedOutput; evaluationResults?: LocalEvaluationResult[]; }) => void; } export type YieldedOutput = { data: string; retrievedContextToEvaluate?: string | string[]; meta?: { usage?: { promptTokens: number; completionTokens: number; totalTokens: number; latency?: number; } | { latency: number; }; cost?: { input: number; output: number; total: number; }; }; }; export type TestRunResult = { link: string; result: { name: string; individualEvaluatorMeanScore: { [key: string]: { pass?: boolean; } & ({ score: number; outOf?: number; } | { score: boolean | string; }); }; usage?: { total: number; input: number; completion: number; }; cost?: { total: number; input: number; completion: number; }; latency?: { min: number; max: number; p50: number; p90: number; p95: number; p99: number; mean: number; standardDeviation: number; total: number; }; }[]; }; export type TestRunConfig<T extends DataStructure | undefined = undefined> = { baseUrl: string; apiKey: string; workspaceId: string; name: string; testConfigId?: string; dataStructure?: T; data?: DataValue<T>; evaluators: (LocalEvaluatorType<T> | CombinedLocalEvaluatorType<T, Record<string, PassFailCriteriaType>> | string)[]; humanEvaluationConfig?: HumanEvaluationConfig; outputFunction?: (data: Data<T>) => YieldedOutput | Promise<YieldedOutput>; promptVersion?: { id: string; contextToEvaluate?: string; }; promptChainVersion?: { id: string; contextToEvaluate?: string; }; workflow?: { id: string; contextToEvaluate?: string; }; logger?: TestRunLogger<T>; concurrency?: number; }; export type TestRunBuilder<T extends DataStructure | undefined = undefined> = { withDataStructure: <U extends DataStructure>(dataStructure: U) => TestRunBuilder<U>; withData: (data: TestRunConfig<T>["data"]) => TestRunBuilder<T>; withEvaluators: (...evaluators: TestRunConfig<T>["evaluators"]) => TestRunBuilder<T>; withHumanEvaluationConfig: (humanEvaluationConfig: HumanEvaluationConfig) => TestRunBuilder<T>; yieldsOutput: (outputFunction: TestRunConfig<T>["outputFunction"]) => TestRunBuilder<T>; withPromptVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>; withPromptChainVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>; withWorkflowId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>; withLogger: (logger: TestRunConfig<T>["logger"]) => TestRunBuilder<T>; withConcurrency: (concurrency: TestRunConfig<T>["concurrency"]) => TestRunBuilder<T>; getConfig: () => TestRunConfig<T>; run: (timeoutInMinutes?: number) => Promise<{ testRunResult: TestRunResult; failedEntryIndices: number[]; }>; }; export type MaximAPICreateTestRunResponse = { data: { id: string; workspaceId: string; humanEvaluationConfig?: { emails: string[]; instructions: string; requester: string; }; evalConfig: unknown; parentTestRunId?: string; }; } | { error: { message: string; }; }; export type MaximAPITestRunEntryPushPayload<T extends DataStructure | undefined = undefined> = { testRun: { id: string; datasetEntryId?: string; datasetId?: string; workspaceId: string; humanEvaluationConfig?: { emails: string[]; instructions: string; requester: string; }; evalConfig: unknown; parentTestRunId?: string; }; runConfig?: { usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number; latency?: number; } | { latency?: number; }; cost?: { input: number; output: number; total: number; }; }; entry: MaximAPITestRunEntry; }; export type MaximAPITestRunEntry = { input?: string; expectedOutput?: string; contextToEvaluate?: string | string[]; output?: string; dataEntry: Record<string, string | string[] | null | undefined>; localEvaluationResults?: (LocalEvaluationResult & { id: string; })[]; }; export type MaximAPITestRunStatusResponse = { data: { entryStatus: { total: number; running: number; completed: number; failed: number; queued: number; stopped: number; }; testRunStatus: "QUEUED" | "RUNNING" | "FAILED" | "COMPLETE" | "STOPPED"; }; } | { error: { message: string; }; }; export type MaximAPITestRunResultResponse = { data: TestRunResult; } | { error: { message: string; }; }; export type MaximAPITestRunEntryExecuteWorkflowForDataPayload = { workflowId: string; dataEntry: Record<string, string | string[] | null | undefined>; contextToEvaluate?: string; }; export type MaximAPITestRunEntryExecuteWorkflowForDataResponse = { data: { output?: string; contextToEvaluate?: string; latency: number; }; } | { error: { message: string; }; }; export type MaximAPITestRunEntryExecutePromptForDataPayload = { promptVersionId: string; input: string; dataEntry?: Record<string, string | string[] | null | undefined>; contextToEvaluate?: string; }; export type MaximAPITestRunEntryExecutePromptForDataResponse = { data: { output?: string; contextToEvaluate?: string; usage?: { promptTokens: number; completionTokens: number; totalTokens: number; latency?: number; }; cost?: { input: number; output: number; total: number; }; }; } | { error: { message: string; }; }; export type MaximAPITestRunEntryExecutePromptChainForDataPayload = { promptChainVersionId: string; input: string; dataEntry?: Record<string, string | string[] | null | undefined>; contextToEvaluate?: string; }; export type MaximAPITestRunEntryExecutePromptChainForDataResponse = { data: { output?: string; contextToEvaluate?: string; usage?: { promptTokens: number; completionTokens: number; totalTokens: number; latency?: number; }; cost?: { input: number; output: number; total: number; }; }; } | { error: { message: string; }; };