UNPKG

@mastra/core

Version:

Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.

653 lines (649 loc) • 21.2 kB
import { createStep, createWorkflow, Agent, tryGenerateWithJsonFallback, Workflow } from './chunk-LAQQETGP.js'; import { resolveModelConfig } from './chunk-VXHOOZSK.js'; import { MastraError } from './chunk-PZUZNPFM.js'; import { randomUUID } from 'crypto'; import { z } from 'zod'; var MastraScorer = class _MastraScorer { constructor(config, steps = [], originalPromptObjects = /* @__PURE__ */ new Map()) { this.config = config; this.steps = steps; this.originalPromptObjects = originalPromptObjects; } get type() { return this.config.type; } get name() { return this.config.name; } get description() { return this.config.description; } get judge() { return this.config.judge; } preprocess(stepDef) { const isPromptObj = this.isPromptObject(stepDef); if (isPromptObj) { const promptObj = stepDef; this.originalPromptObjects.set("preprocess", promptObj); } return new _MastraScorer( this.config, [ ...this.steps, { name: "preprocess", definition: stepDef, isPromptObject: isPromptObj } ], new Map(this.originalPromptObjects) ); } analyze(stepDef) { const isPromptObj = this.isPromptObject(stepDef); if (isPromptObj) { const promptObj = stepDef; this.originalPromptObjects.set("analyze", promptObj); } return new _MastraScorer( this.config, [ ...this.steps, { name: "analyze", definition: isPromptObj ? void 0 : stepDef, isPromptObject: isPromptObj } ], new Map(this.originalPromptObjects) ); } generateScore(stepDef) { const isPromptObj = this.isPromptObject(stepDef); if (isPromptObj) { const promptObj = stepDef; this.originalPromptObjects.set("generateScore", promptObj); } return new _MastraScorer( this.config, [ ...this.steps, { name: "generateScore", definition: isPromptObj ? void 0 : stepDef, isPromptObject: isPromptObj } ], new Map(this.originalPromptObjects) ); } generateReason(stepDef) { const isPromptObj = this.isPromptObject(stepDef); if (isPromptObj) { const promptObj = stepDef; this.originalPromptObjects.set("generateReason", promptObj); } return new _MastraScorer( this.config, [ ...this.steps, { name: "generateReason", definition: isPromptObj ? void 0 : stepDef, isPromptObject: isPromptObj } ], new Map(this.originalPromptObjects) ); } get hasGenerateScore() { return this.steps.some((step) => step.name === "generateScore"); } async run(input) { if (!this.hasGenerateScore) { throw new MastraError({ id: "MASTR_SCORER_FAILED_TO_RUN_MISSING_GENERATE_SCORE", domain: "SCORER" /* SCORER */, category: "USER" /* USER */, text: `Cannot execute pipeline without generateScore() step`, details: { scorerId: this.config.name, steps: this.steps.map((s) => s.name).join(", ") } }); } const { tracingContext } = input; let runId = input.runId; if (!runId) { runId = randomUUID(); } const run = { ...input, runId }; const workflow = this.toMastraWorkflow(); const workflowRun = await workflow.createRunAsync(); const workflowResult = await workflowRun.start({ inputData: { run }, tracingContext }); if (workflowResult.status === "failed") { throw new MastraError({ id: "MASTR_SCORER_FAILED_TO_RUN_WORKFLOW_FAILED", domain: "SCORER" /* SCORER */, category: "USER" /* USER */, text: `Scorer Run Failed: ${workflowResult.error}`, details: { scorerId: this.config.name, steps: this.steps.map((s) => s.name).join(", ") } }); } return this.transformToScorerResult({ workflowResult, originalInput: run }); } isPromptObject(stepDef) { if (typeof stepDef === "object" && "description" in stepDef && "createPrompt" in stepDef && !("outputSchema" in stepDef)) { return true; } const isOtherPromptObject = typeof stepDef === "object" && "description" in stepDef && "outputSchema" in stepDef && "createPrompt" in stepDef; return isOtherPromptObject; } getSteps() { return this.steps.map((step) => ({ name: step.name, type: step.isPromptObject ? "prompt" : "function", description: step.definition.description })); } toMastraWorkflow() { const workflowSteps = this.steps.map((scorerStep) => { return createStep({ id: scorerStep.name, description: `Scorer step: ${scorerStep.name}`, inputSchema: z.any(), outputSchema: z.any(), execute: async ({ inputData, getInitData, tracingContext }) => { const { accumulatedResults = {}, generatedPrompts = {} } = inputData; const { run } = getInitData(); const context = this.createScorerContext(scorerStep.name, run, accumulatedResults); let stepResult; let newGeneratedPrompts = generatedPrompts; if (scorerStep.isPromptObject) { const { result, prompt } = await this.executePromptStep(scorerStep, tracingContext, context); stepResult = result; newGeneratedPrompts = { ...generatedPrompts, [`${scorerStep.name}Prompt`]: prompt }; } else { stepResult = await this.executeFunctionStep(scorerStep, context); } const newAccumulatedResults = { ...accumulatedResults, [`${scorerStep.name}StepResult`]: stepResult }; return { stepResult, accumulatedResults: newAccumulatedResults, generatedPrompts: newGeneratedPrompts }; } }); }); const workflow = createWorkflow({ id: `scorer-${this.config.name}`, description: this.config.description, inputSchema: z.object({ run: z.any() // ScorerRun }), outputSchema: z.object({ run: z.any(), score: z.number(), reason: z.string().optional(), preprocessResult: z.any().optional(), analyzeResult: z.any().optional(), preprocessPrompt: z.string().optional(), analyzePrompt: z.string().optional(), generateScorePrompt: z.string().optional(), generateReasonPrompt: z.string().optional() }), options: { // mark all spans generated as part of the scorer workflow internal tracingPolicy: { internal: 15 /* ALL */ } } }); let chainedWorkflow = workflow; for (const step of workflowSteps) { chainedWorkflow = chainedWorkflow.then(step); } return chainedWorkflow.commit(); } createScorerContext(stepName, run, accumulatedResults) { if (stepName === "generateReason") { const score = accumulatedResults.generateScoreStepResult; return { run, results: accumulatedResults, score }; } return { run, results: accumulatedResults }; } async executeFunctionStep(scorerStep, context) { return await scorerStep.definition(context); } async executePromptStep(scorerStep, tracingContext, context) { const originalStep = this.originalPromptObjects.get(scorerStep.name); if (!originalStep) { throw new Error(`Step "${scorerStep.name}" is not a prompt object`); } const prompt = await originalStep.createPrompt(context); const modelConfig = originalStep.judge?.model ?? this.config.judge?.model; const instructions = originalStep.judge?.instructions ?? this.config.judge?.instructions; if (!modelConfig || !instructions) { throw new MastraError({ id: "MASTR_SCORER_FAILED_TO_RUN_MISSING_MODEL_OR_INSTRUCTIONS", domain: "SCORER" /* SCORER */, category: "USER" /* USER */, text: `Step "${scorerStep.name}" requires a model and instructions`, details: { scorerId: this.config.name, step: scorerStep.name } }); } const resolvedModel = await resolveModelConfig(modelConfig); const judge = new Agent({ name: "judge", model: resolvedModel, instructions, options: { tracingPolicy: { internal: 15 /* ALL */ } } }); if (scorerStep.name === "generateScore") { const schema = z.object({ score: z.number() }); let result; if (resolvedModel.specificationVersion === "v2") { result = await tryGenerateWithJsonFallback(judge, prompt, { structuredOutput: { schema }, tracingContext }); } else { result = await judge.generateLegacy(prompt, { output: schema, tracingContext }); } return { result: result.object.score, prompt }; } else if (scorerStep.name === "generateReason") { let result; if (resolvedModel.specificationVersion === "v2") { result = await judge.generate(prompt, { tracingContext }); } else { result = await judge.generateLegacy(prompt, { tracingContext }); } return { result: result.text, prompt }; } else { const promptStep = originalStep; let result; if (resolvedModel.specificationVersion === "v2") { result = await tryGenerateWithJsonFallback(judge, prompt, { structuredOutput: { schema: promptStep.outputSchema }, tracingContext }); } else { result = await judge.generateLegacy(prompt, { output: promptStep.outputSchema, tracingContext }); } return { result: result.object, prompt }; } } transformToScorerResult({ workflowResult, originalInput }) { const finalStepResult = workflowResult.result; const accumulatedResults = finalStepResult?.accumulatedResults || {}; const generatedPrompts = finalStepResult?.generatedPrompts || {}; return { ...originalInput, score: accumulatedResults.generateScoreStepResult, generateScorePrompt: generatedPrompts.generateScorePrompt, reason: accumulatedResults.generateReasonStepResult, generateReasonPrompt: generatedPrompts.generateReasonPrompt, preprocessStepResult: accumulatedResults.preprocessStepResult, preprocessPrompt: generatedPrompts.preprocessPrompt, analyzeStepResult: accumulatedResults.analyzeStepResult, analyzePrompt: generatedPrompts.analyzePrompt }; } }; function createScorer(config) { return new MastraScorer({ name: config.name, description: config.description, judge: config.judge, type: config.type }); } // src/scores/run-experiment/scorerAccumulator.ts var ScoreAccumulator = class { flatScores = {}; workflowScores = {}; stepScores = {}; addScores(scorerResults) { const isTargetWorkflowAndHasStepScores = "steps" in scorerResults; if (isTargetWorkflowAndHasStepScores) { this.addNestedScores(scorerResults); } else { this.addFlatScores(scorerResults); } } addFlatScores(scorerResults) { for (const [scorerName, result] of Object.entries(scorerResults)) { if (!this.flatScores[scorerName]) { this.flatScores[scorerName] = []; } this.flatScores[scorerName].push(result.score); } } addNestedScores(scorerResults) { if ("workflow" in scorerResults && scorerResults.workflow) { for (const [scorerName, result] of Object.entries(scorerResults.workflow)) { if (!this.workflowScores[scorerName]) { this.workflowScores[scorerName] = []; } this.workflowScores[scorerName].push(result.score); } } if ("steps" in scorerResults && scorerResults.steps) { for (const [stepId, stepResults] of Object.entries(scorerResults.steps)) { if (!this.stepScores[stepId]) { this.stepScores[stepId] = {}; } for (const [scorerName, result] of Object.entries(stepResults)) { if (!this.stepScores[stepId][scorerName]) { this.stepScores[stepId][scorerName] = []; } this.stepScores[stepId][scorerName].push(result.score); } } } } addStepScores(stepScorerResults) { for (const [stepId, stepResults] of Object.entries(stepScorerResults)) { if (!this.stepScores[stepId]) { this.stepScores[stepId] = {}; } for (const [scorerName, result] of Object.entries(stepResults)) { if (!this.stepScores[stepId][scorerName]) { this.stepScores[stepId][scorerName] = []; } this.stepScores[stepId][scorerName].push(result.score); } } } getAverageScores() { const result = {}; for (const [scorerName, scoreArray] of Object.entries(this.flatScores)) { result[scorerName] = this.getAverageScore(scoreArray); } if (Object.keys(this.workflowScores).length > 0) { result.workflow = {}; for (const [scorerName, scoreArray] of Object.entries(this.workflowScores)) { result.workflow[scorerName] = this.getAverageScore(scoreArray); } } if (Object.keys(this.stepScores).length > 0) { result.steps = {}; for (const [stepId, stepScorers] of Object.entries(this.stepScores)) { result.steps[stepId] = {}; for (const [scorerName, scoreArray] of Object.entries(stepScorers)) { result.steps[stepId][scorerName] = this.getAverageScore(scoreArray); } } } return result; } getAverageScore(scoreArray) { if (scoreArray.length > 0) { return scoreArray.reduce((a, b) => a + b, 0) / scoreArray.length; } else { return 0; } } }; // src/scores/run-experiment/index.ts async function runExperiment(config) { const { data, scorers, target, onItemComplete, concurrency = 1 } = config; validateExperimentInputs(data, scorers, target); let totalItems = 0; const scoreAccumulator = new ScoreAccumulator(); const pMap = (await import('p-map')).default; await pMap( data, async (item) => { const targetResult = await executeTarget(target, item); const scorerResults = await runScorers(scorers, targetResult, item); scoreAccumulator.addScores(scorerResults); if (onItemComplete) { await onItemComplete({ item, targetResult, scorerResults }); } totalItems++; }, { concurrency } ); return { scores: scoreAccumulator.getAverageScores(), summary: { totalItems } }; } function isWorkflow(target) { return target instanceof Workflow; } function isWorkflowScorerConfig(scorers) { return typeof scorers === "object" && !Array.isArray(scorers) && ("workflow" in scorers || "steps" in scorers); } function validateExperimentInputs(data, scorers, target) { if (data.length === 0) { throw new MastraError({ domain: "SCORER", id: "RUN_EXPERIMENT_FAILED_NO_DATA_PROVIDED", category: "USER", text: "Failed to run experiment: Data array is empty" }); } for (let i = 0; i < data.length; i++) { const item = data[i]; if (!item || typeof item !== "object" || !("input" in item)) { throw new MastraError({ domain: "SCORER", id: "INVALID_DATA_ITEM", category: "USER", text: `Invalid data item at index ${i}: must have 'input' properties` }); } } if (Array.isArray(scorers)) { if (scorers.length === 0) { throw new MastraError({ domain: "SCORER", id: "NO_SCORERS_PROVIDED", category: "USER", text: "At least one scorer must be provided" }); } } else if (isWorkflow(target) && isWorkflowScorerConfig(scorers)) { const hasScorers = scorers.workflow && scorers.workflow.length > 0 || scorers.steps && Object.keys(scorers.steps).length > 0; if (!hasScorers) { throw new MastraError({ domain: "SCORER", id: "NO_SCORERS_PROVIDED", category: "USER", text: "At least one workflow or step scorer must be provided" }); } } else if (!isWorkflow(target) && !Array.isArray(scorers)) { throw new MastraError({ domain: "SCORER", id: "INVALID_AGENT_SCORERS", category: "USER", text: "Agent scorers must be an array of scorers" }); } } async function executeTarget(target, item) { try { if (isWorkflow(target)) { return await executeWorkflow(target, item); } else { return await executeAgent(target, item); } } catch (error) { throw new MastraError( { domain: "SCORER", id: "RUN_EXPERIMENT_TARGET_FAILED_TO_GENERATE_RESULT", category: "USER", text: "Failed to run experiment: Error generating result from target", details: { item: JSON.stringify(item) } }, error ); } } async function executeWorkflow(target, item) { const run = await target.createRunAsync({ disableScorers: true }); const workflowResult = await run.start({ inputData: item.input, runtimeContext: item.runtimeContext }); return { scoringData: { input: item.input, output: workflowResult.status === "success" ? workflowResult.result : void 0, stepResults: workflowResult.steps } }; } async function executeAgent(agent, item) { const model = await agent.getModel(); if (model.specificationVersion === "v2") { return await agent.generate(item.input, { scorers: {}, returnScorerData: true, runtimeContext: item.runtimeContext }); } else { return await agent.generateLegacy(item.input, { scorers: {}, returnScorerData: true, runtimeContext: item.runtimeContext }); } } async function runScorers(scorers, targetResult, item) { const scorerResults = {}; if (Array.isArray(scorers)) { for (const scorer of scorers) { try { const score = await scorer.run({ input: targetResult.scoringData?.input, output: targetResult.scoringData?.output, groundTruth: item.groundTruth, runtimeContext: item.runtimeContext, tracingContext: item.tracingContext }); scorerResults[scorer.name] = score; } catch (error) { throw new MastraError( { domain: "SCORER", id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_RESULT", category: "USER", text: `Failed to run experiment: Error running scorer ${scorer.name}`, details: { scorerName: scorer.name, item: JSON.stringify(item) } }, error ); } } } else { if (scorers.workflow) { const workflowScorerResults = {}; for (const scorer of scorers.workflow) { const score = await scorer.run({ input: targetResult.scoringData.input, output: targetResult.scoringData.output, groundTruth: item.groundTruth, runtimeContext: item.runtimeContext, tracingContext: item.tracingContext }); workflowScorerResults[scorer.name] = score; } if (Object.keys(workflowScorerResults).length > 0) { scorerResults.workflow = workflowScorerResults; } } if (scorers.steps) { const stepScorerResults = {}; for (const [stepId, stepScorers] of Object.entries(scorers.steps)) { const stepResult = targetResult.scoringData.stepResults?.[stepId]; if (stepResult?.status === "success" && stepResult.payload && stepResult.output) { const stepResults = {}; for (const scorer of stepScorers) { try { const score = await scorer.run({ input: stepResult.payload, output: stepResult.output, groundTruth: item.groundTruth, runtimeContext: item.runtimeContext, tracingContext: item.tracingContext }); stepResults[scorer.name] = score; } catch (error) { throw new MastraError( { domain: "SCORER", id: "RUN_EXPERIMENT_SCORER_FAILED_TO_SCORE_STEP_RESULT", category: "USER", text: `Failed to run experiment: Error running scorer ${scorer.name} on step ${stepId}`, details: { scorerName: scorer.name, stepId } }, error ); } } if (Object.keys(stepResults).length > 0) { stepScorerResults[stepId] = stepResults; } } } if (Object.keys(stepScorerResults).length > 0) { scorerResults.steps = stepScorerResults; } } } return scorerResults; } export { MastraScorer, createScorer, runExperiment }; //# sourceMappingURL=chunk-MRSBLBQ5.js.map //# sourceMappingURL=chunk-MRSBLBQ5.js.map