UNPKG

@mastra/core

Version:

Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.

291 lines 16 kB
import { z } from 'zod/v4'; import type { MastraModelConfig } from '../llm/model/shared.types.js'; import type { Mastra } from '../mastra/index.js'; import { EntityType } from '../observability/index.js'; import type { CorrelationContext, DefinitionSource, ObservabilityContext, ScorerScoreSource, ScorerStepType, ScorerTargetScope } from '../observability/index.js'; import { RequestContext } from '../request-context/index.js'; import type { PublicSchema } from '../schema/index.js'; import type { ScoringSamplingConfig, ScorerRunInputForAgent, ScorerRunOutputForAgent, Trajectory, TrajectoryExpectation } from './types.js'; interface ScorerStepDefinition { name: string; definition: any; isPromptObject: boolean; } type ScorerTypeShortcuts = { agent: { input: ScorerRunInputForAgent; output: ScorerRunOutputForAgent; }; trajectory: { input: ScorerRunInputForAgent; output: Trajectory; }; }; interface ScorerConfig<TID extends string, TInput = any, TRunOutput = any> { id: TID; name?: string; description: string; judge?: { model: MastraModelConfig; instructions: string; }; type?: keyof ScorerTypeShortcuts | { input: z.ZodSchema<TInput>; output: z.ZodSchema<TRunOutput>; }; /** * Transform the scorer run data before the SCORER_RUN span is created. * Use this to strip unnecessary data from `input` and `output`, reducing * what flows into both the scorer pipeline and the observability span. * * Runs synchronously before any span creation or pipeline execution. */ prepareRun?: (run: ScorerRun<TInput, TRunOutput>) => ScorerRun<TInput, TRunOutput> | Promise<ScorerRun<TInput, TRunOutput>>; } interface ScorerRun<TInput = any, TOutput = any> { /** Unique ID for this scorer execution. Generated by scorer.run() when omitted. */ runId?: string; /** Primary scorer input. This is often model input/messages, but can be any structured value. */ input?: TInput; /** Primary scorer target output. This is the required value the scorer evaluates. */ output: TOutput; /** Optional expected label/reference value for judged or supervised evaluations. */ groundTruth?: any; /** Expected trajectory config for trajectory scorers. Flows from dataset items or scorer constructor. */ expectedTrajectory?: TrajectoryExpectation; /** Optional request context forwarded to scorers and judge prompts. */ requestContext?: Record<string, any> | RequestContext; /** What kind of scoring flow produced this score, such as live runs, trace scoring, or experiments. */ scoreSource?: ScorerScoreSource; /** * How the scorer interpreted the target data. * `span` means a single span's input/output was scored. * `trajectory` means a trajectory/path was scored. */ targetScope?: ScorerTargetScope; /** Entity type of the scored target when known. */ targetEntityType?: EntityType; /** Trace anchor for the target being scored when available. */ targetTraceId?: string; /** Optional span anchor for the target being scored. */ targetSpanId?: string; /** Live correlation snapshot for the target span/trace when available. */ targetCorrelationContext?: CorrelationContext; /** Live target metadata to merge into emitted score metadata when available. */ targetMetadata?: Record<string, unknown>; } interface PromptObject<TOutput, TAccumulated extends Record<string, any>, TStepName extends string = string, TInput = any, TRunOutput = any> { description: string; /** * Schema defining the expected output structure. * Accepts any schema type supported by Mastra (Zod v4, JSON Schema, AI SDK Schema, or StandardSchema). * Will be converted to StandardSchemaWithJSON at runtime via toStandardSchema(). * * The TOutput generic is inferred from this schema's output type. */ outputSchema: PublicSchema<TOutput>; judge?: { model: MastraModelConfig; instructions: string; }; createPrompt: (context: PromptObjectContext<TAccumulated, TStepName, TInput, TRunOutput>) => string | Promise<string>; } type StepResultKey<T extends string> = `${T}StepResult`; type Awaited<T> = T extends Promise<infer U> ? U : T; type StepContext<TAccumulated extends Record<string, any>, TInput, TRunOutput> = Partial<ObservabilityContext> & { run: ScorerRun<TInput, TRunOutput>; results: TAccumulated; }; type AccumulatedResults<T extends Record<string, any>, K extends string, V> = T & Record<StepResultKey<K>, V>; type GenerateReasonContext<TAccumulated extends Record<string, any>, TInput, TRunOutput> = StepContext<TAccumulated, TInput, TRunOutput> & { score: TAccumulated extends Record<'generateScoreStepResult', infer TScore> ? TScore : never; }; type ScorerRunResult<TAccumulatedResults extends Record<string, any>, TInput, TRunOutput> = Promise<ScorerRun<TInput, TRunOutput> & { scoreTraceId?: string; score: TAccumulatedResults extends Record<'generateScoreStepResult', infer TScore> ? TScore : never; reason?: TAccumulatedResults extends Record<'generateReasonStepResult', infer TReason> ? TReason : undefined; preprocessPrompt?: string; analyzePrompt?: string; generateScorePrompt?: string; generateReasonPrompt?: string; preprocessStepResult?: TAccumulatedResults extends Record<'preprocessStepResult', infer TPreprocess> ? TPreprocess : undefined; analyzeStepResult?: TAccumulatedResults extends Record<'analyzeStepResult', infer TAnalyze> ? TAnalyze : undefined; } & { runId: string; }>; type PromptObjectContext<TAccumulated extends Record<string, any>, TStepName extends string, TInput, TRunOutput> = TStepName extends 'generateReason' ? GenerateReasonContext<TAccumulated, TInput, TRunOutput> : StepContext<TAccumulated, TInput, TRunOutput>; type FunctionStep<TAccumulated extends Record<string, any>, TInput, TRunOutput, TOutput> = ((context: StepContext<TAccumulated, TInput, TRunOutput>) => TOutput) | ((context: StepContext<TAccumulated, TInput, TRunOutput>) => Promise<TOutput>); type GenerateReasonFunctionStep<TAccumulated extends Record<string, any>, TInput, TRunOutput> = ((context: GenerateReasonContext<TAccumulated, TInput, TRunOutput>) => any) | ((context: GenerateReasonContext<TAccumulated, TInput, TRunOutput>) => Promise<any>); type GenerateScoreFunctionStep<TAccumulated extends Record<string, any>, TInput, TRunOutput> = ((context: StepContext<TAccumulated, TInput, TRunOutput>) => number) | ((context: StepContext<TAccumulated, TInput, TRunOutput>) => Promise<number>); interface GenerateScorePromptObject<TAccumulated extends Record<string, any>, TInput, TRunOutput> { description: string; judge?: { model: MastraModelConfig; instructions: string; }; createPrompt: (context: StepContext<TAccumulated, TInput, TRunOutput>) => string | Promise<string>; } interface GenerateReasonPromptObject<TAccumulated extends Record<string, any>, TInput, TRunOutput> { description: string; judge?: { model: MastraModelConfig; instructions: string; }; createPrompt: (context: GenerateReasonContext<TAccumulated, TInput, TRunOutput>) => string | Promise<string>; } type PreprocessStepDef<TAccumulated extends Record<string, any>, TStepOutput, TInput, TRunOutput> = FunctionStep<TAccumulated, TInput, TRunOutput, TStepOutput> | (PromptObject<TStepOutput, TAccumulated, 'preprocess', TInput, TRunOutput> & { outputSchema: PublicSchema<TStepOutput>; }); type AnalyzeStepDef<TAccumulated extends Record<string, any>, TStepOutput, TInput, TRunOutput> = FunctionStep<TAccumulated, TInput, TRunOutput, TStepOutput> | (PromptObject<TStepOutput, TAccumulated, 'analyze', TInput, TRunOutput> & { outputSchema: PublicSchema<TStepOutput>; }); type GenerateScoreStepDef<TAccumulated extends Record<string, any>, TInput, TRunOutput> = GenerateScoreFunctionStep<TAccumulated, TInput, TRunOutput> | GenerateScorePromptObject<TAccumulated, TInput, TRunOutput>; type GenerateReasonStepDef<TAccumulated extends Record<string, any>, TInput, TRunOutput> = GenerateReasonFunctionStep<TAccumulated, TInput, TRunOutput> | GenerateReasonPromptObject<TAccumulated, TInput, TRunOutput>; declare class MastraScorer<TID extends string = string, TInput = any, TRunOutput = any, TAccumulatedResults extends Record<string, any> = {}> { #private; config: ScorerConfig<TID, TInput, TRunOutput>; private steps; private originalPromptObjects; /** * Tracks whether this scorer was defined in code or loaded from storage. * Set by `Mastra.addScorer()` when the `source` option is provided. */ source?: DefinitionSource; constructor(config: ScorerConfig<TID, TInput, TRunOutput>, steps?: Array<ScorerStepDefinition>, originalPromptObjects?: Map<string, PromptObject<any, any, any, TInput, TRunOutput> | GenerateReasonPromptObject<any, TInput, TRunOutput> | GenerateScorePromptObject<any, TInput, TRunOutput>>, mastra?: Mastra); /** * Registers the Mastra instance with the scorer. * This enables access to custom gateways for model resolution. * @internal */ __registerMastra(mastra: Mastra): void; /** * Returns the raw storage configuration this scorer was created from, * or undefined if it was created from code. */ toRawConfig(): Record<string, unknown> | undefined; /** * Sets the raw storage configuration for this scorer. * @internal */ __setRawConfig(rawConfig: Record<string, unknown>): void; get type(): keyof ScorerTypeShortcuts | { input: z.ZodType<TInput, unknown, z.core.$ZodTypeInternals<TInput, unknown>>; output: z.ZodType<TRunOutput, unknown, z.core.$ZodTypeInternals<TRunOutput, unknown>>; } | undefined; get id(): TID; get name(): string; get description(): string; get judge(): { model: MastraModelConfig; instructions: string; } | undefined; preprocess<TPreprocessOutput>(stepDef: PreprocessStepDef<TAccumulatedResults, TPreprocessOutput, TInput, TRunOutput>): MastraScorer<TID, TInput, TRunOutput, AccumulatedResults<TAccumulatedResults, 'preprocess', Awaited<TPreprocessOutput>>>; analyze<TAnalyzeOutput>(stepDef: AnalyzeStepDef<TAccumulatedResults, TAnalyzeOutput, TInput, TRunOutput>): MastraScorer<TID, TInput, TRunOutput, AccumulatedResults<TAccumulatedResults, 'analyze', Awaited<TAnalyzeOutput>>>; generateScore<TScoreOutput extends number = number>(stepDef: GenerateScoreStepDef<TAccumulatedResults, TInput, TRunOutput>): MastraScorer<TID, TInput, TRunOutput, AccumulatedResults<TAccumulatedResults, 'generateScore', Awaited<TScoreOutput>>>; generateReason<TReasonOutput = string>(stepDef: GenerateReasonStepDef<TAccumulatedResults, TInput, TRunOutput>): MastraScorer<TID, TInput, TRunOutput, AccumulatedResults<TAccumulatedResults, 'generateReason', Awaited<TReasonOutput>>>; private get hasGenerateScore(); private normalizeRunRequestContext; run(input: ScorerRun<TInput, TRunOutput>): ScorerRunResult<TAccumulatedResults, TInput, TRunOutput>; private isPromptObject; getSteps(): Array<{ name: string; type: ScorerStepType; description?: string; }>; private toMastraWorkflow; private createScorerContext; private executeFunctionStep; private executePromptStep; private transformToScorerResult; } export declare function createScorer<TID extends string, TType extends keyof ScorerTypeShortcuts>(config: Omit<ScorerConfig<TID, any, any>, 'type'> & { type: TType; }): MastraScorer<TID, ScorerTypeShortcuts[TType]['input'], ScorerTypeShortcuts[TType]['output'], {}>; export declare function createScorer<TID extends string, TInputSchema extends z.ZodTypeAny, TOutputSchema extends z.ZodTypeAny>(config: Omit<ScorerConfig<TID, z.infer<TInputSchema>, z.infer<TOutputSchema>>, 'type'> & { type: { input: TInputSchema; output: TOutputSchema; }; }): MastraScorer<TID, z.infer<TInputSchema>, z.infer<TOutputSchema>, {}>; export declare function createScorer<TInput = any, TRunOutput = any, TID extends string = string>(config: ScorerConfig<TID, TInput, TRunOutput>): MastraScorer<TID, TInput, TRunOutput, {}>; export type MastraScorerEntry = { scorer: MastraScorer<any, any, any, any>; sampling?: ScoringSamplingConfig; }; export type MastraScorers = Record<string, MastraScorerEntry>; /** * Known MastraMessagePart type values. Provides autocomplete for common types * while still allowing arbitrary `data-*` strings via the `string & {}` escape hatch. */ export type MastraPartType = 'text' | 'tool-invocation' | 'step-start' | 'reasoning' | 'image' | 'file' | 'source' | 'source-document' | 'data-' | 'data-om-' | 'data-om-status' | 'data-om-observation-start' | 'data-om-observation-end' | 'data-om-observation-failed' | 'data-om-buffering-start' | 'data-om-buffering-end' | 'data-om-buffering-failed' | 'data-om-activation' | 'data-om-thread-update' | 'data-workspace-' | 'data-workspace-metadata' | 'data-sandbox-' | 'data-sandbox-stdout' | 'data-sandbox-stderr' | 'data-sandbox-exit' | 'data-sandbox-command' | 'data-tool-' | 'data-tool-call-approval' | 'data-tool-call-suspended' | 'data-system-reminder' | 'data-tripwire' | 'data-structured-output' | (string & {}); export interface FilterRunOptions { /** * Keep only messages whose parts match these MastraMessagePart type patterns. * Applied to both `input.rememberedMessages` and `output` when they contain * MastraDBMessage arrays (the `type: 'agent'` scorer shape). * * Each entry is prefix-matched against `MastraMessagePart.type`: * - `'text'` — text parts * - `'tool-invocation'` — tool invocation parts * - `'step-start'` — step markers * - `'data-'` — all data parts (OM, workspace, sandbox, etc.) * - `'data-om-'` — only observational memory data parts * * Messages where no part matches are dropped. Plain text messages (user text, * assistant text without tool parts) are always kept regardless of this filter. * * To filter by specific tool names, use `toolNames` instead. */ partTypes?: MastraPartType[]; /** * Keep only tool-invocation messages for these specific tools. * Each entry is prefix-matched against `toolInvocation.toolName`. * Non-tool messages (text, data) are unaffected by this filter. * * @example `['execute_command', 'write_file', 'string_replace']` */ toolNames?: string[]; /** * Maximum number of messages to keep in `input.rememberedMessages`. * Taken from the end (most recent messages). Useful for limiting context window. */ maxRememberedMessages?: number; /** * Maximum number of messages to keep in `output` (response messages). * Taken from the end. */ maxOutputMessages?: number; /** * Drop `requestContext` entirely from the run. */ dropRequestContext?: boolean; /** * Drop `expectedTrajectory` from the run. */ dropExpectedTrajectory?: boolean; /** * Drop `groundTruth` from the run. */ dropGroundTruth?: boolean; } /** * Creates a `prepareRun` function from declarative options. * Use this with `createScorer({ prepareRun: filterRun({ ... }) })`. * * @example * ```ts * createScorer({ * id: 'my-scorer', * description: '...', * type: 'agent', * prepareRun: filterRun({ * toolNames: ['execute_command', 'write_file', 'string_replace_lsp'], * maxRememberedMessages: 20, * }), * }) * ``` */ export declare function filterRun<TInput = unknown, TOutput = unknown>(options: FilterRunOptions): (run: ScorerRun<TInput, TOutput>) => ScorerRun<TInput, TOutput>; export type { ScorerConfig, ScorerRun, PromptObject }; export { MastraScorer }; //# sourceMappingURL=base.d.ts.map