@maximai/maxim-js
Version:
Maxim AI JS SDK. Visit https://getmaxim.ai for more info.
263 lines (262 loc) • 7.63 kB
TypeScript
import type { Data, DataStructure, DataValue } from "../models/dataset";
import type { CombinedLocalEvaluatorType, HumanEvaluationConfig, LocalEvaluationResult, LocalEvaluatorType, PassFailCriteriaType } from "./evaluator";
export interface TestRunLogger<T extends DataStructure | undefined = undefined> {
info: (message: string) => void;
error: (message: string) => void;
processed: (message: string, data: {
datasetEntry: Data<T>;
output?: YieldedOutput;
evaluationResults?: LocalEvaluationResult[];
}) => void;
}
export type YieldedOutput = {
data: string;
retrievedContextToEvaluate?: string | string[];
meta?: {
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
latency?: number;
} | {
latency: number;
};
cost?: {
input: number;
output: number;
total: number;
};
};
};
export type TestRunResult = {
link: string;
result: {
name: string;
individualEvaluatorMeanScore: {
[key: string]: {
pass?: boolean;
} & ({
score: number;
outOf?: number;
} | {
score: boolean | string;
});
};
usage?: {
total: number;
input: number;
completion: number;
};
cost?: {
total: number;
input: number;
completion: number;
};
latency?: {
min: number;
max: number;
p50: number;
p90: number;
p95: number;
p99: number;
mean: number;
standardDeviation: number;
total: number;
};
}[];
};
export type TestRunConfig<T extends DataStructure | undefined = undefined> = {
baseUrl: string;
apiKey: string;
workspaceId: string;
name: string;
testConfigId?: string;
dataStructure?: T;
data?: DataValue<T>;
evaluators: (LocalEvaluatorType<T> | CombinedLocalEvaluatorType<T, Record<string, PassFailCriteriaType>> | string)[];
humanEvaluationConfig?: HumanEvaluationConfig;
outputFunction?: (data: Data<T>) => YieldedOutput | Promise<YieldedOutput>;
promptVersion?: {
id: string;
contextToEvaluate?: string;
};
promptChainVersion?: {
id: string;
contextToEvaluate?: string;
};
workflow?: {
id: string;
contextToEvaluate?: string;
};
logger?: TestRunLogger<T>;
concurrency?: number;
};
export type TestRunBuilder<T extends DataStructure | undefined = undefined> = {
withDataStructure: <U extends DataStructure>(dataStructure: U) => TestRunBuilder<U>;
withData: (data: TestRunConfig<T>["data"]) => TestRunBuilder<T>;
withEvaluators: (...evaluators: TestRunConfig<T>["evaluators"]) => TestRunBuilder<T>;
withHumanEvaluationConfig: (humanEvaluationConfig: HumanEvaluationConfig) => TestRunBuilder<T>;
yieldsOutput: (outputFunction: TestRunConfig<T>["outputFunction"]) => TestRunBuilder<T>;
withPromptVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
withPromptChainVersionId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
withWorkflowId: (id: string, contextToEvaluate?: string) => TestRunBuilder<T>;
withLogger: (logger: TestRunConfig<T>["logger"]) => TestRunBuilder<T>;
withConcurrency: (concurrency: TestRunConfig<T>["concurrency"]) => TestRunBuilder<T>;
getConfig: () => TestRunConfig<T>;
run: (timeoutInMinutes?: number) => Promise<{
testRunResult: TestRunResult;
failedEntryIndices: number[];
}>;
};
export type MaximAPICreateTestRunResponse = {
data: {
id: string;
workspaceId: string;
humanEvaluationConfig?: {
emails: string[];
instructions: string;
requester: string;
};
evalConfig: unknown;
parentTestRunId?: string;
};
} | {
error: {
message: string;
};
};
export type MaximAPITestRunEntryPushPayload<T extends DataStructure | undefined = undefined> = {
testRun: {
id: string;
datasetEntryId?: string;
datasetId?: string;
workspaceId: string;
humanEvaluationConfig?: {
emails: string[];
instructions: string;
requester: string;
};
evalConfig: unknown;
parentTestRunId?: string;
};
runConfig?: {
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
latency?: number;
} | {
latency?: number;
};
cost?: {
input: number;
output: number;
total: number;
};
};
entry: MaximAPITestRunEntry;
};
export type MaximAPITestRunEntry = {
input?: string;
expectedOutput?: string;
contextToEvaluate?: string | string[];
output?: string;
dataEntry: Record<string, string | string[] | null | undefined>;
localEvaluationResults?: (LocalEvaluationResult & {
id: string;
})[];
};
export type MaximAPITestRunStatusResponse = {
data: {
entryStatus: {
total: number;
running: number;
completed: number;
failed: number;
queued: number;
stopped: number;
};
testRunStatus: "QUEUED" | "RUNNING" | "FAILED" | "COMPLETE" | "STOPPED";
};
} | {
error: {
message: string;
};
};
export type MaximAPITestRunResultResponse = {
data: TestRunResult;
} | {
error: {
message: string;
};
};
export type MaximAPITestRunEntryExecuteWorkflowForDataPayload = {
workflowId: string;
dataEntry: Record<string, string | string[] | null | undefined>;
contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecuteWorkflowForDataResponse = {
data: {
output?: string;
contextToEvaluate?: string;
latency: number;
};
} | {
error: {
message: string;
};
};
export type MaximAPITestRunEntryExecutePromptForDataPayload = {
promptVersionId: string;
input: string;
dataEntry?: Record<string, string | string[] | null | undefined>;
contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecutePromptForDataResponse = {
data: {
output?: string;
contextToEvaluate?: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
latency?: number;
};
cost?: {
input: number;
output: number;
total: number;
};
};
} | {
error: {
message: string;
};
};
export type MaximAPITestRunEntryExecutePromptChainForDataPayload = {
promptChainVersionId: string;
input: string;
dataEntry?: Record<string, string | string[] | null | undefined>;
contextToEvaluate?: string;
};
export type MaximAPITestRunEntryExecutePromptChainForDataResponse = {
data: {
output?: string;
contextToEvaluate?: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
latency?: number;
};
cost?: {
input: number;
output: number;
total: number;
};
};
} | {
error: {
message: string;
};
};