@mastra/core
Version:
Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.
1,538 lines (1,533 loc) • 53.6 kB
JavaScript
import { validateAndSaveScore } from './chunk-RTJNKJR7.js';
import { isSupportedLanguageModel } from './chunk-AM3IOVFX.js';
import { extractTrajectory, extractTrajectoryFromTrace } from './chunk-FIHGLBDA.js';
import { resolveObservabilityContext } from './chunk-4ZCIE3Q5.js';
import { EntityType } from './chunk-QAXRURAT.js';
import { MastraError } from './chunk-FJEVLHJT.js';
import { RequestContext } from './chunk-BBVL3KAA.js';
import { isZodType } from '@mastra/schema-compat';
import { zodToJsonSchema } from '@mastra/schema-compat/zod-to-json';
// src/datasets/experiment/executor.ts
async function executeScorer(scorer, item) {
try {
const result = await scorer.run(item.input);
const score = typeof result.score === "number" && !isNaN(result.score) ? result.score : null;
if (score === null && result.score !== void 0) {
console.warn(`Scorer ${scorer.id} returned invalid score: ${result.score}`);
}
return {
output: {
score,
reason: typeof result.reason === "string" ? result.reason : null
},
error: null,
traceId: null
// Scorers don't produce traces
};
} catch (error) {
return {
output: null,
error: {
message: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : void 0
},
traceId: null
};
}
}
async function executeTarget(target, targetType, item, options) {
try {
const signal = options?.signal;
if (signal?.aborted) {
throw signal.reason ?? new DOMException("The operation was aborted.", "AbortError");
}
let executionPromise;
switch (targetType) {
case "agent":
executionPromise = executeAgent(
target,
item,
signal,
options?.requestContext,
options?.experimentId,
options?.versions
);
break;
case "workflow":
executionPromise = executeWorkflow(target, item, options?.requestContext);
break;
case "scorer":
executionPromise = executeScorer(target, item);
break;
case "processor":
throw new Error(`Target type '${targetType}' not yet supported.`);
default:
throw new Error(`Unknown target type: ${targetType}`);
}
if (signal) {
return await raceWithSignal(executionPromise, signal);
}
return await executionPromise;
} catch (error) {
return {
output: null,
error: {
message: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : void 0
},
traceId: null
};
}
}
function raceWithSignal(promise, signal) {
if (signal.aborted) {
return Promise.reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError"));
}
return new Promise((resolve, reject) => {
const onAbort = () => {
reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError"));
};
signal.addEventListener("abort", onAbort, { once: true });
promise.then(
(value) => {
signal.removeEventListener("abort", onAbort);
resolve(value);
},
(err) => {
signal.removeEventListener("abort", onAbort);
reject(err);
}
);
});
}
async function executeAgent(agent, item, signal, requestContext, experimentId, versions) {
const model = await agent.getModel();
const input = item.input;
const reqCtx = requestContext ? new RequestContext(Object.entries(requestContext)) : void 0;
const tracingOptions = experimentId ? { metadata: { experimentId } } : void 0;
const rawResult = isSupportedLanguageModel(model) ? await agent.generate(input, {
scorers: {},
returnScorerData: true,
abortSignal: signal,
...reqCtx ? { requestContext: reqCtx } : {},
...tracingOptions ? { tracingOptions } : {},
...versions ? { versions } : {}
}) : await agent.generateLegacy(input, {
scorers: {},
returnScorerData: true,
...reqCtx ? { requestContext: reqCtx } : {},
...tracingOptions ? { tracingOptions } : {}
});
const result = rawResult;
const traceId = result.traceId ?? null;
const scoringData = result.scoringData;
const trimmedOutput = {
text: result.text,
object: result.object,
toolCalls: result.toolCalls,
toolResults: result.toolResults,
sources: result.sources,
files: result.files,
usage: result.usage,
reasoningText: result.reasoningText,
traceId,
error: result.error ?? null
};
return {
output: trimmedOutput,
error: null,
traceId,
scorerInput: scoringData?.input,
scorerOutput: scoringData?.output
};
}
async function executeWorkflow(workflow, item, requestContext) {
const reqCtx = requestContext ? new RequestContext(Object.entries(requestContext)) : void 0;
const observabilityContext = resolveObservabilityContext({});
const run = await workflow.createRun({ disableScorers: true });
const result = await run.start({
inputData: item.input,
...reqCtx ? { requestContext: reqCtx } : {},
...observabilityContext
});
const traceId = result.traceId ?? null;
const spanId = result.spanId ?? null;
if (result.status === "success") {
return {
output: result.result,
error: null,
traceId,
spanId,
stepResults: result.steps,
stepExecutionPath: result.stepExecutionPath
};
}
if (result.status === "failed") {
return {
output: null,
error: { message: result.error?.message ?? "Workflow failed", stack: result.error?.stack },
traceId,
spanId,
stepResults: result.steps,
stepExecutionPath: result.stepExecutionPath
};
}
if (result.status === "tripwire") {
return {
output: null,
error: { message: `Workflow tripwire: ${result.tripwire?.reason ?? "Unknown reason"}` },
traceId,
spanId,
stepResults: result.steps,
stepExecutionPath: result.stepExecutionPath
};
}
if (result.status === "suspended") {
return {
output: null,
error: { message: "Workflow suspended - not yet supported in dataset experiments" },
traceId,
spanId,
stepResults: result.steps,
stepExecutionPath: result.stepExecutionPath
};
}
if (result.status === "paused") {
return {
output: null,
error: { message: "Workflow paused - not yet supported in dataset experiments" },
traceId,
spanId,
stepResults: result.steps,
stepExecutionPath: result.stepExecutionPath
};
}
const _exhaustiveCheck = result;
return {
output: null,
error: { message: `Workflow ended with unexpected status: ${_exhaustiveCheck.status}` },
traceId,
spanId
};
}
// src/datasets/experiment/scorer.ts
function toScorerTargetEntityType(targetType) {
switch (targetType) {
case "agent":
return EntityType.AGENT;
case "workflow":
return EntityType.WORKFLOW_RUN;
case "scorer":
return EntityType.SCORER;
default:
return void 0;
}
}
function resolveScorers(mastra, scorers) {
if (!scorers || scorers.length === 0) return [];
return scorers.map((scorer) => {
if (typeof scorer === "string") {
const resolved = mastra.getScorerById(scorer);
if (!resolved) {
console.warn(`Scorer not found: ${scorer}`);
return null;
}
return resolved;
}
return scorer;
}).filter((s) => s !== null);
}
async function extractTrajectoryFromStorage(storage, traceId) {
if (!storage || !traceId) return void 0;
try {
const observabilityStore = await storage.getStore("observability");
if (!observabilityStore) return void 0;
const trace = await observabilityStore.getTrace({ traceId });
if (!trace?.spans?.length) return void 0;
return extractTrajectoryFromTrace(trace.spans);
} catch {
return void 0;
}
}
async function runScorersForItem(scorers, item, output, storage, runId, targetType, targetId, itemId, scorerInput, scorerOutput, traceId, workflowData) {
if (scorers.length === 0) return [];
const hasTrajectoryScorer = scorers.some((s) => s.type === "trajectory");
let trajectoryOutput;
if (hasTrajectoryScorer) {
const traceTrajectory = await extractTrajectoryFromStorage(storage, traceId);
trajectoryOutput = traceTrajectory ?? (scorerOutput ? extractTrajectory(scorerOutput) : { steps: [] });
}
const targetCorrelationContext = {
...traceId ? { traceId } : {},
entityType: toScorerTargetEntityType(targetType),
entityId: targetId,
entityName: targetId,
experimentId: runId
};
const settled = await Promise.allSettled(
scorers.map(async (scorer) => {
const { result, promptMetadata } = await runScorerSafe(
scorer,
item,
output,
scorerInput,
scorerOutput,
targetType,
traceId,
targetCorrelationContext,
scorer.type === "trajectory" ? trajectoryOutput : void 0,
workflowData
);
if (storage && result.score !== null) {
try {
await validateAndSaveScore(storage, {
scorerId: scorer.id,
score: result.score,
reason: result.reason ?? void 0,
input: item.input,
output,
additionalContext: item.metadata,
entityType: targetType.toUpperCase(),
entityId: itemId,
source: "TEST",
runId,
traceId,
scorer: {
id: scorer.id,
name: scorer.name,
description: scorer.description ?? "",
hasJudge: !!scorer.judge
},
entity: {
id: targetId,
name: targetId
},
...promptMetadata
});
} catch (saveError) {
console.warn(`Failed to save score for scorer ${scorer.id}:`, saveError);
}
}
return result;
})
);
return settled.map((s, i) => {
if (s.status === "fulfilled") return s.value;
const scorer = scorers[i];
return {
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: String(s.reason),
targetScope: scorer.type === "trajectory" ? "trajectory" : "span"
};
});
}
async function runScorerSafe(scorer, item, output, scorerInput, scorerOutput, targetType, targetTraceId, targetCorrelationContext, trajectoryOutput, workflowData) {
try {
const effectiveOutput = trajectoryOutput ?? scorerOutput ?? output;
const effectiveScope = trajectoryOutput ? "trajectory" : "span";
const targetMetadata = !trajectoryOutput && workflowData && (workflowData.stepResults || workflowData.stepExecutionPath) ? {
...workflowData.stepResults ? { stepResults: workflowData.stepResults } : {},
...workflowData.stepExecutionPath ? { stepExecutionPath: workflowData.stepExecutionPath } : {}
} : void 0;
const scoreResult = await scorer.run({
input: scorerInput ?? item.input,
output: effectiveOutput,
groundTruth: item.groundTruth,
scoreSource: "experiment",
targetScope: effectiveScope,
targetEntityType: toScorerTargetEntityType(targetType),
targetTraceId,
...workflowData?.spanId ? { targetSpanId: workflowData.spanId } : {},
...targetCorrelationContext ? { targetCorrelationContext } : {},
...targetMetadata ? { targetMetadata } : {}
});
if (typeof scoreResult !== "object" || scoreResult === null) {
return {
result: {
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: `Scorer ${scorer.name} (${scorer.id}) returned invalid result: expected object, got ${scoreResult === null ? "null" : typeof scoreResult} (${String(scoreResult)})`
},
promptMetadata: {}
};
}
const fields = scoreResult;
const score = typeof fields.score === "number" ? fields.score : null;
const reason = typeof fields.reason === "string" ? fields.reason : null;
const str = (key) => typeof fields[key] === "string" ? fields[key] : void 0;
const obj = (key) => {
const val = fields[key];
return typeof val === "object" && val !== null ? val : void 0;
};
return {
result: {
scorerId: scorer.id,
scorerName: scorer.name,
score,
reason,
error: null,
targetScope: effectiveScope
},
promptMetadata: {
generateScorePrompt: str("generateScorePrompt"),
generateReasonPrompt: str("generateReasonPrompt"),
preprocessStepResult: obj("preprocessStepResult"),
preprocessPrompt: str("preprocessPrompt"),
analyzeStepResult: obj("analyzeStepResult"),
analyzePrompt: str("analyzePrompt")
}
};
} catch (error) {
return {
result: {
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: error instanceof Error ? error.message : String(error),
targetScope: trajectoryOutput ? "trajectory" : "span"
},
promptMetadata: {}
};
}
}
function resolveStepScorers(mastra, stepsConfig) {
if (!stepsConfig) return {};
const resolved = {};
for (const [stepId, scorers] of Object.entries(stepsConfig)) {
const stepScorers = resolveScorers(mastra, scorers);
if (stepScorers.length > 0) resolved[stepId] = stepScorers;
}
return resolved;
}
async function runStepScorersForItem(stepScorers, item, workflowData, storage, runId, targetType, targetId, itemId, traceId) {
const stepIds = Object.keys(stepScorers);
if (stepIds.length === 0) return [];
const results = [];
const stepResults = workflowData?.stepResults;
for (const stepId of stepIds) {
const scorers = stepScorers[stepId];
const stepResult = stepResults?.[stepId];
if (!stepResult || stepResult.status !== "success" || stepResult.output === void 0) {
for (const scorer of scorers) {
results.push({
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: `Step "${stepId}" did not produce a successful output (status: ${stepResult?.status ?? "missing"})`,
targetScope: "span",
stepId
});
}
continue;
}
const stepInput = stepResult.payload !== void 0 ? stepResult.payload : item.input;
const stepOutput = stepResult.output;
const targetCorrelationContext = {
...traceId ? { traceId } : {},
entityType: EntityType.WORKFLOW_STEP,
entityId: stepId,
entityName: stepId,
experimentId: runId
};
const settled = await Promise.allSettled(
scorers.map(async (scorer) => {
try {
const scoreResult = await scorer.run({
input: stepInput,
output: stepOutput,
groundTruth: item.groundTruth,
scoreSource: "experiment",
targetScope: "span",
targetEntityType: EntityType.WORKFLOW_STEP,
targetTraceId: traceId,
...targetCorrelationContext ? { targetCorrelationContext } : {}
});
if (typeof scoreResult !== "object" || scoreResult === null) {
return {
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: `Scorer ${scorer.name} (${scorer.id}) returned invalid result on step ${stepId}`,
targetScope: "span",
stepId
};
}
const fields = scoreResult;
const score = typeof fields.score === "number" ? fields.score : null;
const reason = typeof fields.reason === "string" ? fields.reason : null;
if (storage && score !== null) {
try {
await validateAndSaveScore(storage, {
scorerId: scorer.id,
score,
reason: reason ?? void 0,
input: stepInput,
output: stepOutput,
additionalContext: { ...item.metadata, stepId },
entityType: "WORKFLOW_STEP",
entityId: itemId,
source: "TEST",
runId,
traceId,
scorer: {
id: scorer.id,
name: scorer.name,
description: scorer.description ?? "",
hasJudge: !!scorer.judge
},
entity: {
id: targetId,
name: targetId
}
});
} catch (saveError) {
console.warn(`Failed to save score for step scorer ${scorer.id} on ${stepId}:`, saveError);
}
}
return {
scorerId: scorer.id,
scorerName: scorer.name,
score,
reason,
error: null,
targetScope: "span",
stepId
};
} catch (error) {
return {
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: error instanceof Error ? error.message : String(error),
targetScope: "span",
stepId
};
}
})
);
for (let i = 0; i < settled.length; i++) {
const s = settled[i];
if (s.status === "fulfilled") {
results.push(s.value);
} else {
const scorer = scorers[i];
results.push({
scorerId: scorer.id,
scorerName: scorer.name,
score: null,
reason: null,
error: String(s.reason),
targetScope: "span",
stepId
});
}
}
}
return results;
}
// src/datasets/experiment/analytics/aggregate.ts
function computeMean(values) {
if (values.length === 0) {
return 0;
}
const sum = values.reduce((acc, val) => acc + val, 0);
return sum / values.length;
}
function computeScorerStats(scores, passThreshold = 0.5) {
const totalItems = scores.length;
if (totalItems === 0) {
return {
errorRate: 0,
errorCount: 0,
passRate: 0,
passCount: 0,
avgScore: 0,
scoreCount: 0,
totalItems: 0
};
}
const validScores = [];
let errorCount = 0;
for (const score of scores) {
if (score.score === null || score.score === void 0) {
errorCount++;
} else {
validScores.push(score.score);
}
}
const scoreCount = validScores.length;
const errorRate = errorCount / totalItems;
const passCount = validScores.filter((s) => s >= passThreshold).length;
const passRate = scoreCount > 0 ? passCount / scoreCount : 0;
const avgScore = computeMean(validScores);
return {
errorRate,
errorCount,
passRate,
passCount,
avgScore,
scoreCount,
totalItems
};
}
function isRegression(delta, threshold, direction = "higher-is-better") {
if (direction === "higher-is-better") {
return delta < -threshold;
} else {
return delta > threshold;
}
}
// src/datasets/experiment/analytics/compare.ts
var DEFAULT_THRESHOLD = {
value: 0,
direction: "higher-is-better"
};
var DEFAULT_PASS_THRESHOLD = 0.5;
async function compareExperiments(mastra, config) {
const { experimentIdA, experimentIdB, thresholds = {} } = config;
const warnings = [];
const storage = mastra.getStorage();
if (!storage) {
throw new Error("Storage not configured. Configure storage in Mastra instance.");
}
const experimentsStore = await storage.getStore("experiments");
const scoresStore = await storage.getStore("scores");
if (!experimentsStore) {
throw new Error("ExperimentsStorage not configured.");
}
if (!scoresStore) {
throw new Error("ScoresStorage not configured.");
}
const [experimentA, experimentB] = await Promise.all([
experimentsStore.getExperimentById({ id: experimentIdA }),
experimentsStore.getExperimentById({ id: experimentIdB })
]);
if (!experimentA) {
throw new Error(`Experiment not found: ${experimentIdA}`);
}
if (!experimentB) {
throw new Error(`Experiment not found: ${experimentIdB}`);
}
const versionMismatch = experimentA.datasetVersion !== experimentB.datasetVersion;
if (versionMismatch) {
warnings.push(
`Experiments have different dataset versions: ${experimentA.datasetVersion} vs ${experimentB.datasetVersion}`
);
}
const [resultsA, resultsB] = await Promise.all([
experimentsStore.listExperimentResults({ experimentId: experimentIdA, pagination: { page: 0, perPage: false } }),
experimentsStore.listExperimentResults({ experimentId: experimentIdB, pagination: { page: 0, perPage: false } })
]);
const [scoresA, scoresB] = await Promise.all([
scoresStore.listScoresByRunId({ runId: experimentIdA, pagination: { page: 0, perPage: false } }),
scoresStore.listScoresByRunId({ runId: experimentIdB, pagination: { page: 0, perPage: false } })
]);
if (resultsA.results.length === 0 && resultsB.results.length === 0) {
warnings.push("Both experiments have no results.");
return buildEmptyResult(experimentA, experimentB, versionMismatch, warnings);
}
if (resultsA.results.length === 0) {
warnings.push("Experiment A has no results.");
}
if (resultsB.results.length === 0) {
warnings.push("Experiment B has no results.");
}
const itemIdsA = new Set(resultsA.results.map((r) => r.itemId));
const itemIdsB = new Set(resultsB.results.map((r) => r.itemId));
const overlappingItemIds = [...itemIdsA].filter((id) => itemIdsB.has(id));
if (overlappingItemIds.length === 0) {
warnings.push("No overlapping items between experiments.");
}
const scoresMapA = groupScoresByScorerAndItem(scoresA.scores);
const scoresMapB = groupScoresByScorerAndItem(scoresB.scores);
const allScorerIds = /* @__PURE__ */ new Set([...Object.keys(scoresMapA), ...Object.keys(scoresMapB)]);
const scorers = {};
let hasRegression = false;
for (const scorerId of allScorerIds) {
const scorerScoresA = scoresMapA[scorerId] ?? {};
const scorerScoresB = scoresMapB[scorerId] ?? {};
const scoresArrayA = Object.values(scorerScoresA);
const scoresArrayB = Object.values(scorerScoresB);
const thresholdConfig = thresholds[scorerId] ?? DEFAULT_THRESHOLD;
const threshold = thresholdConfig.value;
const direction = thresholdConfig.direction ?? "higher-is-better";
const statsA = computeScorerStats(scoresArrayA, DEFAULT_PASS_THRESHOLD);
const statsB = computeScorerStats(scoresArrayB, DEFAULT_PASS_THRESHOLD);
const delta = statsB.avgScore - statsA.avgScore;
const regressed = isRegression(delta, threshold, direction);
if (regressed) {
hasRegression = true;
}
scorers[scorerId] = {
statsA,
statsB,
delta,
regressed,
threshold
};
}
const allItemIds = /* @__PURE__ */ new Set([...itemIdsA, ...itemIdsB]);
const items = [];
for (const itemId of allItemIds) {
const inBothExperiments = itemIdsA.has(itemId) && itemIdsB.has(itemId);
const itemScoresA = {};
const itemScoresB = {};
for (const scorerId of allScorerIds) {
const scoreA = scoresMapA[scorerId]?.[itemId];
const scoreB = scoresMapB[scorerId]?.[itemId];
itemScoresA[scorerId] = scoreA?.score ?? null;
itemScoresB[scorerId] = scoreB?.score ?? null;
}
items.push({
itemId,
inBothExperiments,
scoresA: itemScoresA,
scoresB: itemScoresB
});
}
return {
experimentA: {
id: experimentA.id,
datasetVersion: experimentA.datasetVersion
},
experimentB: {
id: experimentB.id,
datasetVersion: experimentB.datasetVersion
},
versionMismatch,
hasRegression,
scorers,
items,
warnings
};
}
function groupScoresByScorerAndItem(scores) {
const result = {};
for (const score of scores) {
const scorerId = score.scorerId;
const itemId = score.entityId;
if (!result[scorerId]) {
result[scorerId] = {};
}
result[scorerId][itemId] = score;
}
return result;
}
function buildEmptyResult(experimentA, experimentB, versionMismatch, warnings) {
return {
experimentA: {
id: experimentA.id,
datasetVersion: experimentA.datasetVersion
},
experimentB: {
id: experimentB.id,
datasetVersion: experimentB.datasetVersion
},
versionMismatch,
hasRegression: false,
scorers: {},
items: [],
warnings
};
}
// src/datasets/experiment/index.ts
async function runExperiment(mastra, config) {
const {
datasetId,
targetType,
targetId,
scorers: scorerInput,
version,
maxConcurrency = 5,
signal,
itemTimeout,
maxRetries = 0,
experimentId: providedExperimentId,
name,
description,
metadata,
requestContext: globalRequestContext,
agentVersion,
versions
} = config;
const startedAt = /* @__PURE__ */ new Date();
const experimentId = providedExperimentId ?? crypto.randomUUID();
const storage = mastra.getStorage();
const datasetsStore = await storage?.getStore("datasets");
const experimentsStore = await storage?.getStore("experiments");
const markFailedOnSetupError = async (err) => {
if (providedExperimentId && experimentsStore) {
try {
await experimentsStore.updateExperiment({
id: experimentId,
status: "failed",
completedAt: /* @__PURE__ */ new Date()
});
} catch (updateErr) {
mastra.getLogger()?.error(`Failed to mark experiment ${experimentId} as failed: ${updateErr}`);
}
}
throw err;
};
let items;
let datasetVersion;
let datasetRecord;
try {
if (config.data) {
const rawData = typeof config.data === "function" ? await config.data() : config.data;
items = rawData.map((dataItem) => {
const id = dataItem.id ?? crypto.randomUUID();
return {
id,
datasetVersion: null,
input: dataItem.input,
groundTruth: dataItem.groundTruth,
metadata: dataItem.metadata
};
});
datasetVersion = null;
} else if (datasetId) {
if (!datasetsStore) {
throw new Error("DatasetsStorage not configured. Configure storage in Mastra instance.");
}
datasetRecord = await datasetsStore.getDatasetById({ id: datasetId });
if (!datasetRecord) {
throw new MastraError({
id: "DATASET_NOT_FOUND",
text: `Dataset not found: ${datasetId}`,
domain: "STORAGE",
category: "USER"
});
}
datasetVersion = version ?? datasetRecord.version;
const versionItems = await datasetsStore.getItemsByVersion({
datasetId,
version: datasetVersion
});
if (versionItems.length === 0) {
throw new MastraError({
id: "EXPERIMENT_NO_ITEMS",
text: `No items in dataset ${datasetId} at version ${datasetVersion}`,
domain: "STORAGE",
category: "USER"
});
}
items = versionItems.map((v) => ({
id: v.id,
datasetVersion: v.datasetVersion,
input: v.input,
groundTruth: v.groundTruth,
requestContext: v.requestContext,
metadata: v.metadata
}));
} else {
throw new Error("No data source: provide datasetId or data");
}
} catch (err) {
await markFailedOnSetupError(err);
throw err;
}
let execFn;
try {
if (config.task) {
const taskFn = config.task;
execFn = async (item, itemSignal) => {
try {
const result = await taskFn({
input: item.input,
mastra,
groundTruth: item.groundTruth,
metadata: item.metadata,
signal: itemSignal
});
return { output: result, error: null, traceId: null };
} catch (err) {
return {
output: null,
error: {
message: err instanceof Error ? err.message : String(err),
stack: err instanceof Error ? err.stack : void 0
},
traceId: null
};
}
};
} else if (targetType && targetId) {
const resolved = await resolveTarget(mastra, targetType, targetId, agentVersion);
if (!resolved) {
throw new Error(`Target not found: ${targetType}/${targetId}`);
}
const { target } = resolved;
execFn = (item, itemSignal) => {
const mergedRequestContext = globalRequestContext || item.requestContext ? { ...globalRequestContext, ...item.requestContext } : void 0;
return executeTarget(target, targetType, item, {
signal: itemSignal,
requestContext: mergedRequestContext,
experimentId,
versions
});
};
} else {
throw new Error("No task: provide targetType+targetId or task");
}
} catch (err) {
await markFailedOnSetupError(err);
throw err;
}
let stepsConfigInput;
const flatScorerInput = (() => {
if (!scorerInput) return void 0;
if (Array.isArray(scorerInput)) return scorerInput;
const flat = [];
if ("agent" in scorerInput && scorerInput.agent) flat.push(...scorerInput.agent);
if ("workflow" in scorerInput && scorerInput.workflow) flat.push(...scorerInput.workflow);
if ("trajectory" in scorerInput && scorerInput.trajectory) flat.push(...scorerInput.trajectory);
if ("steps" in scorerInput && scorerInput.steps) {
stepsConfigInput = scorerInput.steps;
}
return flat;
})();
let mergedScorerInput = flatScorerInput;
const datasetScorerIds = datasetRecord?.scorerIds ?? [];
if (datasetScorerIds.length > 0) {
mergedScorerInput = [...flatScorerInput ?? [], ...datasetScorerIds];
}
if (mergedScorerInput && mergedScorerInput.length > 0) {
const seen = /* @__PURE__ */ new Set();
mergedScorerInput = mergedScorerInput.filter((entry) => {
if (typeof entry === "string") {
if (seen.has(entry)) return false;
seen.add(entry);
return true;
}
return true;
});
}
const scorers = resolveScorers(mastra, mergedScorerInput);
const stepScorers = resolveStepScorers(mastra, stepsConfigInput);
if (experimentsStore) {
if (!providedExperimentId) {
await experimentsStore.createExperiment({
id: experimentId,
name,
description,
metadata,
datasetId: datasetId ?? null,
datasetVersion,
targetType: targetType ?? "agent",
targetId: targetId ?? "inline",
totalItems: items.length,
agentVersion
});
}
await experimentsStore.updateExperiment({
id: experimentId,
status: "running",
totalItems: items.length,
startedAt
});
}
let succeededCount = 0;
let failedCount = 0;
const results = new Array(items.length);
const PROGRESS_UPDATE_INTERVAL = 2e3;
let lastProgressUpdate = 0;
try {
const pMap = (await import('p-map')).default;
await pMap(
items.map((item, idx) => ({ item, idx })),
async ({ item, idx }) => {
if (signal?.aborted) {
throw new DOMException("Aborted", "AbortError");
}
const itemStartedAt = /* @__PURE__ */ new Date();
let itemSignal = signal;
if (itemTimeout) {
const timeoutSignal = AbortSignal.timeout(itemTimeout);
itemSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal;
}
let retryCount = 0;
let execResult = await execFn(item, itemSignal);
while (execResult.error && retryCount < maxRetries) {
if (execResult.error.message.toLowerCase().includes("abort")) break;
retryCount++;
const delay = Math.min(1e3 * Math.pow(2, retryCount - 1), 3e4);
const jitter = delay * 0.2 * Math.random();
await new Promise((r) => setTimeout(r, delay + jitter));
if (signal?.aborted) {
throw new DOMException("Aborted", "AbortError");
}
execResult = await execFn(item, itemSignal);
}
const itemCompletedAt = /* @__PURE__ */ new Date();
if (execResult.error) {
failedCount++;
} else {
succeededCount++;
}
const itemResult = {
itemId: item.id,
itemVersion: item.datasetVersion ?? 0,
input: item.input,
output: execResult.output,
groundTruth: item.groundTruth ?? null,
error: execResult.error,
startedAt: itemStartedAt,
completedAt: itemCompletedAt,
retryCount
};
const workflowData = execResult.stepResults || execResult.stepExecutionPath ? {
stepResults: execResult.stepResults,
stepExecutionPath: execResult.stepExecutionPath,
spanId: execResult.spanId
} : void 0;
const flatScores = await runScorersForItem(
scorers,
item,
execResult.output,
storage ?? null,
experimentId,
targetType ?? "agent",
targetId ?? "inline",
item.id,
execResult.scorerInput,
execResult.scorerOutput,
execResult.traceId ?? void 0,
workflowData
);
const stepScores = await runStepScorersForItem(
stepScorers,
item,
workflowData,
storage ?? null,
experimentId,
targetType ?? "agent",
targetId ?? "inline",
item.id,
execResult.traceId ?? void 0
);
const itemScores = [...flatScores, ...stepScores];
if (experimentsStore) {
try {
await experimentsStore.addExperimentResult({
experimentId,
itemId: item.id,
itemDatasetVersion: item.datasetVersion,
input: item.input,
output: execResult.output,
groundTruth: item.groundTruth ?? null,
error: execResult.error,
startedAt: itemStartedAt,
completedAt: itemCompletedAt,
retryCount,
traceId: execResult.traceId
});
} catch (persistError) {
console.warn(`Failed to persist result for item ${item.id}:`, persistError);
}
const now = Date.now();
if (now - lastProgressUpdate >= PROGRESS_UPDATE_INTERVAL) {
lastProgressUpdate = now;
try {
await experimentsStore.updateExperiment({
id: experimentId,
succeededCount,
failedCount
});
} catch {
}
}
}
results[idx] = {
...itemResult,
scores: itemScores
};
},
{ concurrency: maxConcurrency }
);
} catch {
const completedAt2 = /* @__PURE__ */ new Date();
const skippedCount2 = items.length - succeededCount - failedCount;
if (experimentsStore) {
await experimentsStore.updateExperiment({
id: experimentId,
status: "failed",
succeededCount,
failedCount,
skippedCount: skippedCount2,
completedAt: completedAt2
});
}
return {
experimentId,
status: "failed",
totalItems: items.length,
succeededCount,
failedCount,
skippedCount: skippedCount2,
completedWithErrors: false,
startedAt,
completedAt: completedAt2,
results: results.filter(Boolean)
};
}
const completedAt = /* @__PURE__ */ new Date();
const status = failedCount === items.length ? "failed" : "completed";
const completedWithErrors = status === "completed" && failedCount > 0;
const skippedCount = items.length - succeededCount - failedCount;
if (experimentsStore) {
await experimentsStore.updateExperiment({
id: experimentId,
status,
succeededCount,
failedCount,
skippedCount,
completedAt
});
}
return {
experimentId,
status,
totalItems: items.length,
succeededCount,
failedCount,
skippedCount,
completedWithErrors,
startedAt,
completedAt,
results
};
}
async function resolveTarget(mastra, targetType, targetId, agentVersion) {
let resolved = null;
switch (targetType) {
case "agent":
try {
if (agentVersion) {
resolved = await mastra.getAgentById(targetId, { versionId: agentVersion });
} else {
resolved = mastra.getAgentById(targetId);
}
} catch {
try {
if (agentVersion) {
resolved = await mastra.getAgent(targetId, { versionId: agentVersion });
} else {
resolved = mastra.getAgent(targetId);
}
} catch {
}
}
break;
case "workflow":
try {
resolved = mastra.getWorkflowById(targetId);
} catch {
try {
resolved = mastra.getWorkflow(targetId);
} catch {
}
}
break;
case "scorer":
try {
resolved = mastra.getScorerById(targetId) ?? null;
} catch {
}
break;
}
return resolved ? { target: resolved } : null;
}
var Dataset = class {
id;
#mastra;
#datasetsStore;
#experimentsStore;
constructor(id, mastra) {
this.id = id;
this.#mastra = mastra;
}
// ---------------------------------------------------------------------------
// Lazy storage resolution
// ---------------------------------------------------------------------------
async #getDatasetsStore() {
if (this.#datasetsStore) return this.#datasetsStore;
const storage = this.#mastra.getStorage();
if (!storage) {
throw new MastraError({
id: "DATASETS_STORAGE_NOT_CONFIGURED",
text: "Storage not configured. Configure storage in Mastra instance.",
domain: "STORAGE",
category: "USER"
});
}
const store = await storage.getStore("datasets");
if (!store) {
throw new MastraError({
id: "DATASETS_STORE_NOT_AVAILABLE",
text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.",
domain: "STORAGE",
category: "USER"
});
}
this.#datasetsStore = store;
return store;
}
async #getExperimentsStore() {
if (this.#experimentsStore) return this.#experimentsStore;
const storage = this.#mastra.getStorage();
if (!storage) {
throw new MastraError({
id: "DATASETS_STORAGE_NOT_CONFIGURED",
text: "Storage not configured. Configure storage in Mastra instance.",
domain: "STORAGE",
category: "USER"
});
}
const store = await storage.getStore("experiments");
if (!store) {
throw new MastraError({
id: "EXPERIMENTS_STORE_NOT_AVAILABLE",
text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.",
domain: "STORAGE",
category: "USER"
});
}
this.#experimentsStore = store;
return store;
}
// ---------------------------------------------------------------------------
// Dataset metadata
// ---------------------------------------------------------------------------
/**
* Get the full dataset record from storage.
*/
async getDetails() {
const store = await this.#getDatasetsStore();
const record = await store.getDatasetById({ id: this.id });
if (!record) {
throw new MastraError({
id: "DATASET_NOT_FOUND",
text: `Dataset not found: ${this.id}`,
domain: "STORAGE",
category: "USER"
});
}
return record;
}
/**
* Update dataset metadata and/or schemas.
* Zod schemas are automatically converted to JSON Schema.
*/
async update(input) {
const store = await this.#getDatasetsStore();
let { inputSchema, groundTruthSchema, ...rest } = input;
if (inputSchema !== void 0 && inputSchema !== null && isZodType(inputSchema)) {
inputSchema = zodToJsonSchema(inputSchema);
}
if (groundTruthSchema !== void 0 && groundTruthSchema !== null && isZodType(groundTruthSchema)) {
groundTruthSchema = zodToJsonSchema(groundTruthSchema);
}
return store.updateDataset({
id: this.id,
...rest,
inputSchema,
groundTruthSchema
});
}
// ---------------------------------------------------------------------------
// Item CRUD
// ---------------------------------------------------------------------------
/**
* Add a single item to the dataset.
*/
async addItem(input) {
const store = await this.#getDatasetsStore();
return store.addItem({
datasetId: this.id,
input: input.input,
groundTruth: input.groundTruth,
expectedTrajectory: input.expectedTrajectory,
requestContext: input.requestContext,
metadata: input.metadata,
source: input.source
});
}
/**
* Add multiple items to the dataset in bulk.
*/
async addItems(input) {
const store = await this.#getDatasetsStore();
return store.batchInsertItems({
datasetId: this.id,
items: input.items
});
}
/**
* Get a single item by ID, optionally at a specific version.
*/
async getItem(args) {
const store = await this.#getDatasetsStore();
return store.getItemById({ id: args.itemId, datasetVersion: args.version });
}
/**
* List items in the dataset, optionally at a specific version.
*/
async listItems(args) {
const store = await this.#getDatasetsStore();
if (args?.version) {
return store.getItemsByVersion({ datasetId: this.id, version: args.version });
}
return store.listItems({
datasetId: this.id,
search: args?.search,
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
});
}
/**
* Update an existing item in the dataset.
*/
async updateItem(input) {
const store = await this.#getDatasetsStore();
return store.updateItem({
id: input.itemId,
datasetId: this.id,
input: input.input,
groundTruth: input.groundTruth,
expectedTrajectory: input.expectedTrajectory,
requestContext: input.requestContext,
metadata: input.metadata
});
}
/**
* Delete a single item from the dataset.
*/
async deleteItem(args) {
const store = await this.#getDatasetsStore();
return store.deleteItem({ id: args.itemId, datasetId: this.id });
}
/**
* Delete multiple items from the dataset in bulk.
*/
async deleteItems(args) {
const store = await this.#getDatasetsStore();
return store.batchDeleteItems({ datasetId: this.id, itemIds: args.itemIds });
}
// ---------------------------------------------------------------------------
// Versioning
// ---------------------------------------------------------------------------
/**
* List all versions of this dataset.
*/
async listVersions(args) {
const store = await this.#getDatasetsStore();
return store.listDatasetVersions({
datasetId: this.id,
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
});
}
/**
* Get full SCD-2 history of a specific item across all dataset versions.
*/
async getItemHistory(args) {
const store = await this.#getDatasetsStore();
return store.getItemHistory(args.itemId);
}
// ---------------------------------------------------------------------------
// Experiments
// ---------------------------------------------------------------------------
/**
* Run an experiment on this dataset and wait for completion.
*/
async startExperiment(config) {
return runExperiment(this.#mastra, { datasetId: this.id, ...config });
}
/**
* Start an experiment asynchronously (fire-and-forget).
* Returns immediately with the experiment ID and pending status.
*/
async startExperimentAsync(config) {
const experimentsStore = await this.#getExperimentsStore();
const datasetsStore = await this.#getDatasetsStore();
const dataset = await datasetsStore.getDatasetById({ id: this.id });
if (!dataset) {
throw new MastraError({
id: "DATASET_NOT_FOUND",
text: `Dataset not found: ${this.id}`,
domain: "STORAGE",
category: "USER"
});
}
const targetVersion = config.version ?? dataset.version;
const items = await datasetsStore.getItemsByVersion({
datasetId: this.id,
version: targetVersion
});
if (items.length === 0) {
throw new MastraError({
id: "EXPERIMENT_NO_ITEMS",
text: `Cannot run experiment: dataset "${this.id}" has no items at version ${targetVersion}`,
domain: "STORAGE",
category: "USER"
});
}
const run = await experimentsStore.createExperiment({
datasetId: this.id,
datasetVersion: targetVersion,
targetType: config.targetType ?? "agent",
targetId: config.targetId ?? "inline",
totalItems: items.length,
name: config.name,
description: config.description,
metadata: config.metadata,
agentVersion: config.agentVersion
});
const experimentId = run.id;
void runExperiment(this.#mastra, {
datasetId: this.id,
experimentId,
...config,
version: targetVersion
}).catch(async (err) => {
await experimentsStore.updateExperiment({
id: experimentId,
status: "failed",
completedAt: /* @__PURE__ */ new Date()
}).catch(() => {
});
this.#mastra.getLogger()?.error(`Experiment ${experimentId} failed: ${err?.message ?? err}`);
});
return { experimentId, status: "pending", totalItems: items.length };
}
/**
* List all experiments (runs) for this dataset.
*/
async listExperiments(args) {
const experimentsStore = await this.#getExperimentsStore();
return experimentsStore.listExperiments({
datasetId: this.id,
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
});
}
/**
* Get a specific experiment (run) by ID.
*/
async getExperiment(args) {
const experimentsStore = await this.#getExperimentsStore();
return experimentsStore.getExperimentById({ id: args.experimentId });
}
/**
* List results for a specific experiment.
*/
async listExperimentResults(args) {
const experimentsStore = await this.#getExperimentsStore();
return experimentsStore.listExperimentResults({
experimentId: args.experimentId,
pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 }
});
}
/**
* Delete an experiment (run) by ID.
*/
/**
* Update an experiment result's status or tags.
*/
async updateExperimentResult(input) {
const experimentsStore = await this.#getExperimentsStore();
return experimentsStore.updateExperimentResult(input);
}
async deleteExperiment(args) {
const experimentsStore = await this.#getExperimentsStore();
return experimentsStore.deleteExperiment({ id: args.experimentId });
}
};
var DatasetsManager = class {
#mastra;
#datasetsStore;
#experimentsStore;
constructor(mastra) {
this.#mastra = mastra;
}
// ---------------------------------------------------------------------------
// Lazy storage resolution
// ---------------------------------------------------------------------------
async #getDatasetsStore() {
if (this.#datasetsStore) return this.#datasetsStore;
const storage = this.#mastra.getStorage();
if (!storage) {
throw new MastraError({
id: "DATASETS_STORAGE_NOT_CONFIGURED",
text: "Storage not configured. Configure storage in Mastra instance.",
domain: "STORAGE",
category: "USER"
});
}
const store = await storage.getStore("datasets");
if (!store) {
throw new MastraError({
id: "DATASETS_STORE_NOT_AVAILABLE",
text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.",
domain: "STORAGE",
category: "USER"
});
}
this.#datasetsStore = store;
return store;
}
async #getExperimentsStore() {
if (this.#experimentsStore) return this.#experimentsStore;
const storage = this.#mastra.getStorage();
if (!storage) {
throw new MastraError({
id: "DATASETS_STORAGE_NOT_CONFIGURED",
text: "Storage not configured. Configure storage in Mastra instance.",
domain: "STORAGE",
category: "USER"
});
}
const store = await storage.getStore("experiments");
if (!store) {
throw new MastraError({
id: "EXPERIMENTS_STORE_NOT_AVAILABLE",
text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.",
domain: "STORAGE",
category: "USER"
});
}
this.#experimentsStore = store;
return store;
}
// ---------------------------------------------------------------------------
// Dataset CRUD
// ---------------------------------------------------------------------------
/**
* Create a new dataset.
* Zod schemas are automatically converted to JSON Schema.
*/
async create(input) {
const store = await this.#getDatasetsStore();
let { inputSchema, groundTruthSchema, ...rest } = input;
if (inputSchema !== void 0 && isZodType(inputSchema)) {
inputSchema = zodToJsonSchema(inputSchema);
}
if (groundTruthSchema !== void 0 && isZodType(groundTruthSchema)) {
groundTruthSchema = zodToJsonSchema(groundTruthSchema);
}
const result = await store.createDataset({
...rest,
inputSchema,
groundTruthSchema
});
return new Dataset(result.id, this.#mastra);
}
/**
* Get an existin