@mastra/core
Version:
Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.
95 lines • 4.15 kB
TypeScript
import type { CoreMessage } from '../../_types/@internal_ai-sdk-v4/dist/index.js';
import type { Agent, AgentExecutionOptions, AiMessageType, UIMessageWithMetadata } from '../../agent/index.js';
import type { ObservabilityContext } from '../../observability/index.js';
import type { RequestContext } from '../../request-context/index.js';
import type { WorkflowResult, WorkflowRunStartOptions } from '../../workflows/types.js';
import type { AnyWorkflow } from '../../workflows/workflow.js';
import { Workflow } from '../../workflows/workflow.js';
import type { MastraScorer } from '../base.js';
type WorkflowRunOptions = WorkflowRunStartOptions & {
initialState?: any;
};
type RunEvalsDataItem<TTarget = unknown> = {
input: TTarget extends Workflow<any, any> ? any : TTarget extends Agent ? string | string[] | CoreMessage[] | AiMessageType[] | UIMessageWithMetadata[] : unknown;
groundTruth?: any;
expectedTrajectory?: any;
requestContext?: RequestContext;
startOptions?: WorkflowRunOptions;
} & Partial<ObservabilityContext>;
export type WorkflowScorerConfig = {
/** Scorers that evaluate the overall workflow input/output */
workflow?: MastraScorer<any, any, any, any>[];
/** Scorers that evaluate individual workflow steps by step ID */
steps?: Record<string, MastraScorer<any, any, any, any>[]>;
/** Scorers that evaluate the workflow's step execution trajectory */
trajectory?: MastraScorer<any, any, any, any>[];
};
export type AgentScorerConfig = {
/** Scorers that evaluate the full agent input/output */
agent?: MastraScorer<any, any, any, any>[];
/** Scorers that evaluate the agent's tool call trajectory */
trajectory?: MastraScorer<any, any, any, any>[];
};
type RunEvalsResult = {
scores: Record<string, any>;
summary: {
totalItems: number;
};
};
export declare function runEvals<TAgent extends Agent>(config: {
data: RunEvalsDataItem<TAgent>[];
scorers: MastraScorer<any, any, any, any>[];
target: TAgent;
targetOptions?: Omit<AgentExecutionOptions<any>, 'scorers' | 'returnScorerData' | 'requestContext'>;
onItemComplete?: (params: {
item: RunEvalsDataItem<TAgent>;
targetResult: Awaited<ReturnType<Agent['generate']>>;
scorerResults: Record<string, any>;
}) => void | Promise<void>;
concurrency?: number;
}): Promise<RunEvalsResult>;
export declare function runEvals<TWorkflow extends AnyWorkflow>(config: {
data: RunEvalsDataItem<TWorkflow>[];
scorers: MastraScorer<any, any, any, any>[];
target: TWorkflow;
targetOptions?: WorkflowRunOptions;
onItemComplete?: (params: {
item: RunEvalsDataItem<TWorkflow>;
targetResult: WorkflowResult<any, any, any, any>;
scorerResults: Record<string, any>;
}) => void | Promise<void>;
concurrency?: number;
}): Promise<RunEvalsResult>;
export declare function runEvals<TWorkflow extends AnyWorkflow>(config: {
data: RunEvalsDataItem<TWorkflow>[];
scorers: WorkflowScorerConfig;
target: TWorkflow;
targetOptions?: WorkflowRunOptions;
onItemComplete?: (params: {
item: RunEvalsDataItem<TWorkflow>;
targetResult: WorkflowResult<any, any, any, any>;
scorerResults: {
workflow?: Record<string, any>;
steps?: Record<string, Record<string, any>>;
trajectory?: Record<string, any>;
};
}) => void | Promise<void>;
concurrency?: number;
}): Promise<RunEvalsResult>;
export declare function runEvals<TAgent extends Agent>(config: {
data: RunEvalsDataItem<TAgent>[];
scorers: AgentScorerConfig;
target: TAgent;
targetOptions?: Omit<AgentExecutionOptions<any>, 'scorers' | 'returnScorerData' | 'requestContext'>;
onItemComplete?: (params: {
item: RunEvalsDataItem<TAgent>;
targetResult: Awaited<ReturnType<Agent['generate']>>;
scorerResults: {
agent?: Record<string, any>;
trajectory?: Record<string, any>;
};
}) => void | Promise<void>;
concurrency?: number;
}): Promise<RunEvalsResult>;
export {};
//# sourceMappingURL=index.d.ts.map