UNPKG

@mastra/core

Version:

Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.

95 lines 4.15 kB
import type { CoreMessage } from '../../_types/@internal_ai-sdk-v4/dist/index.js'; import type { Agent, AgentExecutionOptions, AiMessageType, UIMessageWithMetadata } from '../../agent/index.js'; import type { ObservabilityContext } from '../../observability/index.js'; import type { RequestContext } from '../../request-context/index.js'; import type { WorkflowResult, WorkflowRunStartOptions } from '../../workflows/types.js'; import type { AnyWorkflow } from '../../workflows/workflow.js'; import { Workflow } from '../../workflows/workflow.js'; import type { MastraScorer } from '../base.js'; type WorkflowRunOptions = WorkflowRunStartOptions & { initialState?: any; }; type RunEvalsDataItem<TTarget = unknown> = { input: TTarget extends Workflow<any, any> ? any : TTarget extends Agent ? string | string[] | CoreMessage[] | AiMessageType[] | UIMessageWithMetadata[] : unknown; groundTruth?: any; expectedTrajectory?: any; requestContext?: RequestContext; startOptions?: WorkflowRunOptions; } & Partial<ObservabilityContext>; export type WorkflowScorerConfig = { /** Scorers that evaluate the overall workflow input/output */ workflow?: MastraScorer<any, any, any, any>[]; /** Scorers that evaluate individual workflow steps by step ID */ steps?: Record<string, MastraScorer<any, any, any, any>[]>; /** Scorers that evaluate the workflow's step execution trajectory */ trajectory?: MastraScorer<any, any, any, any>[]; }; export type AgentScorerConfig = { /** Scorers that evaluate the full agent input/output */ agent?: MastraScorer<any, any, any, any>[]; /** Scorers that evaluate the agent's tool call trajectory */ trajectory?: MastraScorer<any, any, any, any>[]; }; type RunEvalsResult = { scores: Record<string, any>; summary: { totalItems: number; }; }; export declare function runEvals<TAgent extends Agent>(config: { data: RunEvalsDataItem<TAgent>[]; scorers: MastraScorer<any, any, any, any>[]; target: TAgent; targetOptions?: Omit<AgentExecutionOptions<any>, 'scorers' | 'returnScorerData' | 'requestContext'>; onItemComplete?: (params: { item: RunEvalsDataItem<TAgent>; targetResult: Awaited<ReturnType<Agent['generate']>>; scorerResults: Record<string, any>; }) => void | Promise<void>; concurrency?: number; }): Promise<RunEvalsResult>; export declare function runEvals<TWorkflow extends AnyWorkflow>(config: { data: RunEvalsDataItem<TWorkflow>[]; scorers: MastraScorer<any, any, any, any>[]; target: TWorkflow; targetOptions?: WorkflowRunOptions; onItemComplete?: (params: { item: RunEvalsDataItem<TWorkflow>; targetResult: WorkflowResult<any, any, any, any>; scorerResults: Record<string, any>; }) => void | Promise<void>; concurrency?: number; }): Promise<RunEvalsResult>; export declare function runEvals<TWorkflow extends AnyWorkflow>(config: { data: RunEvalsDataItem<TWorkflow>[]; scorers: WorkflowScorerConfig; target: TWorkflow; targetOptions?: WorkflowRunOptions; onItemComplete?: (params: { item: RunEvalsDataItem<TWorkflow>; targetResult: WorkflowResult<any, any, any, any>; scorerResults: { workflow?: Record<string, any>; steps?: Record<string, Record<string, any>>; trajectory?: Record<string, any>; }; }) => void | Promise<void>; concurrency?: number; }): Promise<RunEvalsResult>; export declare function runEvals<TAgent extends Agent>(config: { data: RunEvalsDataItem<TAgent>[]; scorers: AgentScorerConfig; target: TAgent; targetOptions?: Omit<AgentExecutionOptions<any>, 'scorers' | 'returnScorerData' | 'requestContext'>; onItemComplete?: (params: { item: RunEvalsDataItem<TAgent>; targetResult: Awaited<ReturnType<Agent['generate']>>; scorerResults: { agent?: Record<string, any>; trajectory?: Record<string, any>; }; }) => void | Promise<void>; concurrency?: number; }): Promise<RunEvalsResult>; export {}; //# sourceMappingURL=index.d.ts.map