UNPKG

@mastra/core

Version:

Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.

1,525 lines (1,519 loc) • 54.4 kB
'use strict'; var chunkCCTAEUR3_cjs = require('./chunk-CCTAEUR3.cjs'); var chunkACQ5CVFF_cjs = require('./chunk-ACQ5CVFF.cjs'); var chunkXD7CRAEV_cjs = require('./chunk-XD7CRAEV.cjs'); var chunk2E7FPUYL_cjs = require('./chunk-2E7FPUYL.cjs'); var chunkG3JYQ2UI_cjs = require('./chunk-G3JYQ2UI.cjs'); var chunk4U7ZLI36_cjs = require('./chunk-4U7ZLI36.cjs'); var chunkCAVARKYS_cjs = require('./chunk-CAVARKYS.cjs'); var schemaCompat = require('@mastra/schema-compat'); var zodToJson = require('@mastra/schema-compat/zod-to-json'); // src/datasets/experiment/executor.ts async function executeScorer(scorer, item) { try { const result = await scorer.run(item.input); const score = typeof result.score === "number" && !isNaN(result.score) ? result.score : null; if (score === null && result.score !== void 0) { console.warn(`Scorer ${scorer.id} returned invalid score: ${result.score}`); } return { output: { score, reason: typeof result.reason === "string" ? result.reason : null }, error: null, traceId: null // Scorers don't produce traces }; } catch (error) { return { output: null, error: { message: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 }, traceId: null }; } } async function executeTarget(target, targetType, item, options) { try { const signal = options?.signal; if (signal?.aborted) { throw signal.reason ?? new DOMException("The operation was aborted.", "AbortError"); } let executionPromise; switch (targetType) { case "agent": executionPromise = executeAgent( target, item, signal, options?.requestContext, options?.experimentId, options?.versions ); break; case "workflow": executionPromise = executeWorkflow(target, item, options?.requestContext); break; case "scorer": executionPromise = executeScorer(target, item); break; case "processor": throw new Error(`Target type '${targetType}' not yet supported.`); default: throw new Error(`Unknown target type: ${targetType}`); } if (signal) { return await raceWithSignal(executionPromise, signal); } return await executionPromise; } catch (error) { return { output: null, error: { message: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 }, traceId: null }; } } function raceWithSignal(promise, signal) { if (signal.aborted) { return Promise.reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError")); } return new Promise((resolve, reject) => { const onAbort = () => { reject(signal.reason ?? new DOMException("The operation was aborted.", "AbortError")); }; signal.addEventListener("abort", onAbort, { once: true }); promise.then( (value) => { signal.removeEventListener("abort", onAbort); resolve(value); }, (err) => { signal.removeEventListener("abort", onAbort); reject(err); } ); }); } async function executeAgent(agent, item, signal, requestContext, experimentId, versions) { const model = await agent.getModel(); const input = item.input; const reqCtx = requestContext ? new chunkCAVARKYS_cjs.RequestContext(Object.entries(requestContext)) : void 0; const tracingOptions = experimentId ? { metadata: { experimentId } } : void 0; const rawResult = chunkACQ5CVFF_cjs.isSupportedLanguageModel(model) ? await agent.generate(input, { scorers: {}, returnScorerData: true, abortSignal: signal, ...reqCtx ? { requestContext: reqCtx } : {}, ...tracingOptions ? { tracingOptions } : {}, ...versions ? { versions } : {} }) : await agent.generateLegacy(input, { scorers: {}, returnScorerData: true, ...reqCtx ? { requestContext: reqCtx } : {}, ...tracingOptions ? { tracingOptions } : {} }); const result = rawResult; const traceId = result.traceId ?? null; const scoringData = result.scoringData; const trimmedOutput = { text: result.text, object: result.object, toolCalls: result.toolCalls, toolResults: result.toolResults, sources: result.sources, files: result.files, usage: result.usage, reasoningText: result.reasoningText, traceId, error: result.error ?? null }; return { output: trimmedOutput, error: null, traceId, scorerInput: scoringData?.input, scorerOutput: scoringData?.output }; } async function executeWorkflow(workflow, item, requestContext) { const reqCtx = requestContext ? new chunkCAVARKYS_cjs.RequestContext(Object.entries(requestContext)) : void 0; const observabilityContext = chunk2E7FPUYL_cjs.resolveObservabilityContext({}); const run = await workflow.createRun({ disableScorers: true }); const result = await run.start({ inputData: item.input, ...reqCtx ? { requestContext: reqCtx } : {}, ...observabilityContext }); const traceId = result.traceId ?? null; const spanId = result.spanId ?? null; if (result.status === "success") { return { output: result.result, error: null, traceId, spanId, stepResults: result.steps, stepExecutionPath: result.stepExecutionPath }; } if (result.status === "failed") { return { output: null, error: { message: result.error?.message ?? "Workflow failed", stack: result.error?.stack }, traceId, spanId, stepResults: result.steps, stepExecutionPath: result.stepExecutionPath }; } if (result.status === "tripwire") { return { output: null, error: { message: `Workflow tripwire: ${result.tripwire?.reason ?? "Unknown reason"}` }, traceId, spanId, stepResults: result.steps, stepExecutionPath: result.stepExecutionPath }; } if (result.status === "suspended") { return { output: null, error: { message: "Workflow suspended - not yet supported in dataset experiments" }, traceId, spanId, stepResults: result.steps, stepExecutionPath: result.stepExecutionPath }; } if (result.status === "paused") { return { output: null, error: { message: "Workflow paused - not yet supported in dataset experiments" }, traceId, spanId, stepResults: result.steps, stepExecutionPath: result.stepExecutionPath }; } const _exhaustiveCheck = result; return { output: null, error: { message: `Workflow ended with unexpected status: ${_exhaustiveCheck.status}` }, traceId, spanId }; } // src/datasets/experiment/scorer.ts function toScorerTargetEntityType(targetType) { switch (targetType) { case "agent": return chunkG3JYQ2UI_cjs.EntityType.AGENT; case "workflow": return chunkG3JYQ2UI_cjs.EntityType.WORKFLOW_RUN; case "scorer": return chunkG3JYQ2UI_cjs.EntityType.SCORER; default: return void 0; } } function resolveScorers(mastra, scorers) { if (!scorers || scorers.length === 0) return []; return scorers.map((scorer) => { if (typeof scorer === "string") { const resolved = mastra.getScorerById(scorer); if (!resolved) { console.warn(`Scorer not found: ${scorer}`); return null; } return resolved; } return scorer; }).filter((s) => s !== null); } async function extractTrajectoryFromStorage(storage, traceId) { if (!storage || !traceId) return void 0; try { const observabilityStore = await storage.getStore("observability"); if (!observabilityStore) return void 0; const trace = await observabilityStore.getTrace({ traceId }); if (!trace?.spans?.length) return void 0; return chunkXD7CRAEV_cjs.extractTrajectoryFromTrace(trace.spans); } catch { return void 0; } } async function runScorersForItem(scorers, item, output, storage, runId, targetType, targetId, itemId, scorerInput, scorerOutput, traceId, workflowData) { if (scorers.length === 0) return []; const hasTrajectoryScorer = scorers.some((s) => s.type === "trajectory"); let trajectoryOutput; if (hasTrajectoryScorer) { const traceTrajectory = await extractTrajectoryFromStorage(storage, traceId); trajectoryOutput = traceTrajectory ?? (scorerOutput ? chunkXD7CRAEV_cjs.extractTrajectory(scorerOutput) : { steps: [] }); } const targetCorrelationContext = { ...traceId ? { traceId } : {}, entityType: toScorerTargetEntityType(targetType), entityId: targetId, entityName: targetId, experimentId: runId }; const settled = await Promise.allSettled( scorers.map(async (scorer) => { const { result, promptMetadata } = await runScorerSafe( scorer, item, output, scorerInput, scorerOutput, targetType, traceId, targetCorrelationContext, scorer.type === "trajectory" ? trajectoryOutput : void 0, workflowData ); if (storage && result.score !== null) { try { await chunkCCTAEUR3_cjs.validateAndSaveScore(storage, { scorerId: scorer.id, score: result.score, reason: result.reason ?? void 0, input: item.input, output, additionalContext: item.metadata, entityType: targetType.toUpperCase(), entityId: itemId, source: "TEST", runId, traceId, scorer: { id: scorer.id, name: scorer.name, description: scorer.description ?? "", hasJudge: !!scorer.judge }, entity: { id: targetId, name: targetId }, ...promptMetadata }); } catch (saveError) { console.warn(`Failed to save score for scorer ${scorer.id}:`, saveError); } } return result; }) ); return settled.map((s, i) => { if (s.status === "fulfilled") return s.value; const scorer = scorers[i]; return { scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: String(s.reason), targetScope: scorer.type === "trajectory" ? "trajectory" : "span" }; }); } async function runScorerSafe(scorer, item, output, scorerInput, scorerOutput, targetType, targetTraceId, targetCorrelationContext, trajectoryOutput, workflowData) { try { const effectiveOutput = trajectoryOutput ?? scorerOutput ?? output; const effectiveScope = trajectoryOutput ? "trajectory" : "span"; const targetMetadata = !trajectoryOutput && workflowData && (workflowData.stepResults || workflowData.stepExecutionPath) ? { ...workflowData.stepResults ? { stepResults: workflowData.stepResults } : {}, ...workflowData.stepExecutionPath ? { stepExecutionPath: workflowData.stepExecutionPath } : {} } : void 0; const scoreResult = await scorer.run({ input: scorerInput ?? item.input, output: effectiveOutput, groundTruth: item.groundTruth, scoreSource: "experiment", targetScope: effectiveScope, targetEntityType: toScorerTargetEntityType(targetType), targetTraceId, ...workflowData?.spanId ? { targetSpanId: workflowData.spanId } : {}, ...targetCorrelationContext ? { targetCorrelationContext } : {}, ...targetMetadata ? { targetMetadata } : {} }); if (typeof scoreResult !== "object" || scoreResult === null) { return { result: { scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: `Scorer ${scorer.name} (${scorer.id}) returned invalid result: expected object, got ${scoreResult === null ? "null" : typeof scoreResult} (${String(scoreResult)})` }, promptMetadata: {} }; } const fields = scoreResult; const score = typeof fields.score === "number" ? fields.score : null; const reason = typeof fields.reason === "string" ? fields.reason : null; const str = (key) => typeof fields[key] === "string" ? fields[key] : void 0; const obj = (key) => { const val = fields[key]; return typeof val === "object" && val !== null ? val : void 0; }; return { result: { scorerId: scorer.id, scorerName: scorer.name, score, reason, error: null, targetScope: effectiveScope }, promptMetadata: { generateScorePrompt: str("generateScorePrompt"), generateReasonPrompt: str("generateReasonPrompt"), preprocessStepResult: obj("preprocessStepResult"), preprocessPrompt: str("preprocessPrompt"), analyzeStepResult: obj("analyzeStepResult"), analyzePrompt: str("analyzePrompt") } }; } catch (error) { return { result: { scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: error instanceof Error ? error.message : String(error), targetScope: trajectoryOutput ? "trajectory" : "span" }, promptMetadata: {} }; } } function resolveStepScorers(mastra, stepsConfig) { if (!stepsConfig) return {}; const resolved = {}; for (const [stepId, scorers] of Object.entries(stepsConfig)) { const stepScorers = resolveScorers(mastra, scorers); if (stepScorers.length > 0) resolved[stepId] = stepScorers; } return resolved; } async function runStepScorersForItem(stepScorers, item, workflowData, storage, runId, targetType, targetId, itemId, traceId) { const stepIds = Object.keys(stepScorers); if (stepIds.length === 0) return []; const results = []; const stepResults = workflowData?.stepResults; for (const stepId of stepIds) { const scorers = stepScorers[stepId]; const stepResult = stepResults?.[stepId]; if (!stepResult || stepResult.status !== "success" || stepResult.output === void 0) { for (const scorer of scorers) { results.push({ scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: `Step "${stepId}" did not produce a successful output (status: ${stepResult?.status ?? "missing"})`, targetScope: "span", stepId }); } continue; } const stepInput = stepResult.payload !== void 0 ? stepResult.payload : item.input; const stepOutput = stepResult.output; const targetCorrelationContext = { ...traceId ? { traceId } : {}, entityType: chunkG3JYQ2UI_cjs.EntityType.WORKFLOW_STEP, entityId: stepId, entityName: stepId, experimentId: runId }; const settled = await Promise.allSettled( scorers.map(async (scorer) => { try { const scoreResult = await scorer.run({ input: stepInput, output: stepOutput, groundTruth: item.groundTruth, scoreSource: "experiment", targetScope: "span", targetEntityType: chunkG3JYQ2UI_cjs.EntityType.WORKFLOW_STEP, targetTraceId: traceId, ...targetCorrelationContext ? { targetCorrelationContext } : {} }); if (typeof scoreResult !== "object" || scoreResult === null) { return { scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: `Scorer ${scorer.name} (${scorer.id}) returned invalid result on step ${stepId}`, targetScope: "span", stepId }; } const fields = scoreResult; const score = typeof fields.score === "number" ? fields.score : null; const reason = typeof fields.reason === "string" ? fields.reason : null; if (storage && score !== null) { try { await chunkCCTAEUR3_cjs.validateAndSaveScore(storage, { scorerId: scorer.id, score, reason: reason ?? void 0, input: stepInput, output: stepOutput, additionalContext: { ...item.metadata, stepId }, entityType: "WORKFLOW_STEP", entityId: itemId, source: "TEST", runId, traceId, scorer: { id: scorer.id, name: scorer.name, description: scorer.description ?? "", hasJudge: !!scorer.judge }, entity: { id: targetId, name: targetId } }); } catch (saveError) { console.warn(`Failed to save score for step scorer ${scorer.id} on ${stepId}:`, saveError); } } return { scorerId: scorer.id, scorerName: scorer.name, score, reason, error: null, targetScope: "span", stepId }; } catch (error) { return { scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: error instanceof Error ? error.message : String(error), targetScope: "span", stepId }; } }) ); for (let i = 0; i < settled.length; i++) { const s = settled[i]; if (s.status === "fulfilled") { results.push(s.value); } else { const scorer = scorers[i]; results.push({ scorerId: scorer.id, scorerName: scorer.name, score: null, reason: null, error: String(s.reason), targetScope: "span", stepId }); } } } return results; } // src/datasets/experiment/analytics/aggregate.ts function computeMean(values) { if (values.length === 0) { return 0; } const sum = values.reduce((acc, val) => acc + val, 0); return sum / values.length; } function computeScorerStats(scores, passThreshold = 0.5) { const totalItems = scores.length; if (totalItems === 0) { return { errorRate: 0, errorCount: 0, passRate: 0, passCount: 0, avgScore: 0, scoreCount: 0, totalItems: 0 }; } const validScores = []; let errorCount = 0; for (const score of scores) { if (score.score === null || score.score === void 0) { errorCount++; } else { validScores.push(score.score); } } const scoreCount = validScores.length; const errorRate = errorCount / totalItems; const passCount = validScores.filter((s) => s >= passThreshold).length; const passRate = scoreCount > 0 ? passCount / scoreCount : 0; const avgScore = computeMean(validScores); return { errorRate, errorCount, passRate, passCount, avgScore, scoreCount, totalItems }; } function isRegression(delta, threshold, direction = "higher-is-better") { if (direction === "higher-is-better") { return delta < -threshold; } else { return delta > threshold; } } // src/datasets/experiment/analytics/compare.ts var DEFAULT_THRESHOLD = { value: 0, direction: "higher-is-better" }; var DEFAULT_PASS_THRESHOLD = 0.5; async function compareExperiments(mastra, config) { const { experimentIdA, experimentIdB, thresholds = {} } = config; const warnings = []; const storage = mastra.getStorage(); if (!storage) { throw new Error("Storage not configured. Configure storage in Mastra instance."); } const experimentsStore = await storage.getStore("experiments"); const scoresStore = await storage.getStore("scores"); if (!experimentsStore) { throw new Error("ExperimentsStorage not configured."); } if (!scoresStore) { throw new Error("ScoresStorage not configured."); } const [experimentA, experimentB] = await Promise.all([ experimentsStore.getExperimentById({ id: experimentIdA }), experimentsStore.getExperimentById({ id: experimentIdB }) ]); if (!experimentA) { throw new Error(`Experiment not found: ${experimentIdA}`); } if (!experimentB) { throw new Error(`Experiment not found: ${experimentIdB}`); } const versionMismatch = experimentA.datasetVersion !== experimentB.datasetVersion; if (versionMismatch) { warnings.push( `Experiments have different dataset versions: ${experimentA.datasetVersion} vs ${experimentB.datasetVersion}` ); } const [resultsA, resultsB] = await Promise.all([ experimentsStore.listExperimentResults({ experimentId: experimentIdA, pagination: { page: 0, perPage: false } }), experimentsStore.listExperimentResults({ experimentId: experimentIdB, pagination: { page: 0, perPage: false } }) ]); const [scoresA, scoresB] = await Promise.all([ scoresStore.listScoresByRunId({ runId: experimentIdA, pagination: { page: 0, perPage: false } }), scoresStore.listScoresByRunId({ runId: experimentIdB, pagination: { page: 0, perPage: false } }) ]); if (resultsA.results.length === 0 && resultsB.results.length === 0) { warnings.push("Both experiments have no results."); return buildEmptyResult(experimentA, experimentB, versionMismatch, warnings); } if (resultsA.results.length === 0) { warnings.push("Experiment A has no results."); } if (resultsB.results.length === 0) { warnings.push("Experiment B has no results."); } const itemIdsA = new Set(resultsA.results.map((r) => r.itemId)); const itemIdsB = new Set(resultsB.results.map((r) => r.itemId)); const overlappingItemIds = [...itemIdsA].filter((id) => itemIdsB.has(id)); if (overlappingItemIds.length === 0) { warnings.push("No overlapping items between experiments."); } const scoresMapA = groupScoresByScorerAndItem(scoresA.scores); const scoresMapB = groupScoresByScorerAndItem(scoresB.scores); const allScorerIds = /* @__PURE__ */ new Set([...Object.keys(scoresMapA), ...Object.keys(scoresMapB)]); const scorers = {}; let hasRegression = false; for (const scorerId of allScorerIds) { const scorerScoresA = scoresMapA[scorerId] ?? {}; const scorerScoresB = scoresMapB[scorerId] ?? {}; const scoresArrayA = Object.values(scorerScoresA); const scoresArrayB = Object.values(scorerScoresB); const thresholdConfig = thresholds[scorerId] ?? DEFAULT_THRESHOLD; const threshold = thresholdConfig.value; const direction = thresholdConfig.direction ?? "higher-is-better"; const statsA = computeScorerStats(scoresArrayA, DEFAULT_PASS_THRESHOLD); const statsB = computeScorerStats(scoresArrayB, DEFAULT_PASS_THRESHOLD); const delta = statsB.avgScore - statsA.avgScore; const regressed = isRegression(delta, threshold, direction); if (regressed) { hasRegression = true; } scorers[scorerId] = { statsA, statsB, delta, regressed, threshold }; } const allItemIds = /* @__PURE__ */ new Set([...itemIdsA, ...itemIdsB]); const items = []; for (const itemId of allItemIds) { const inBothExperiments = itemIdsA.has(itemId) && itemIdsB.has(itemId); const itemScoresA = {}; const itemScoresB = {}; for (const scorerId of allScorerIds) { const scoreA = scoresMapA[scorerId]?.[itemId]; const scoreB = scoresMapB[scorerId]?.[itemId]; itemScoresA[scorerId] = scoreA?.score ?? null; itemScoresB[scorerId] = scoreB?.score ?? null; } items.push({ itemId, inBothExperiments, scoresA: itemScoresA, scoresB: itemScoresB }); } return { experimentA: { id: experimentA.id, datasetVersion: experimentA.datasetVersion }, experimentB: { id: experimentB.id, datasetVersion: experimentB.datasetVersion }, versionMismatch, hasRegression, scorers, items, warnings }; } function groupScoresByScorerAndItem(scores) { const result = {}; for (const score of scores) { const scorerId = score.scorerId; const itemId = score.entityId; if (!result[scorerId]) { result[scorerId] = {}; } result[scorerId][itemId] = score; } return result; } function buildEmptyResult(experimentA, experimentB, versionMismatch, warnings) { return { experimentA: { id: experimentA.id, datasetVersion: experimentA.datasetVersion }, experimentB: { id: experimentB.id, datasetVersion: experimentB.datasetVersion }, versionMismatch, hasRegression: false, scorers: {}, items: [], warnings }; } // src/datasets/experiment/index.ts async function runExperiment(mastra, config) { const { datasetId, targetType, targetId, scorers: scorerInput, version, maxConcurrency = 5, signal, itemTimeout, maxRetries = 0, experimentId: providedExperimentId, name, description, metadata, requestContext: globalRequestContext, agentVersion, versions } = config; const startedAt = /* @__PURE__ */ new Date(); const experimentId = providedExperimentId ?? crypto.randomUUID(); const storage = mastra.getStorage(); const datasetsStore = await storage?.getStore("datasets"); const experimentsStore = await storage?.getStore("experiments"); const markFailedOnSetupError = async (err) => { if (providedExperimentId && experimentsStore) { try { await experimentsStore.updateExperiment({ id: experimentId, status: "failed", completedAt: /* @__PURE__ */ new Date() }); } catch (updateErr) { mastra.getLogger()?.error(`Failed to mark experiment ${experimentId} as failed: ${updateErr}`); } } throw err; }; let items; let datasetVersion; let datasetRecord; try { if (config.data) { const rawData = typeof config.data === "function" ? await config.data() : config.data; items = rawData.map((dataItem) => { const id = dataItem.id ?? crypto.randomUUID(); return { id, datasetVersion: null, input: dataItem.input, groundTruth: dataItem.groundTruth, metadata: dataItem.metadata }; }); datasetVersion = null; } else if (datasetId) { if (!datasetsStore) { throw new Error("DatasetsStorage not configured. Configure storage in Mastra instance."); } datasetRecord = await datasetsStore.getDatasetById({ id: datasetId }); if (!datasetRecord) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASET_NOT_FOUND", text: `Dataset not found: ${datasetId}`, domain: "STORAGE", category: "USER" }); } datasetVersion = version ?? datasetRecord.version; const versionItems = await datasetsStore.getItemsByVersion({ datasetId, version: datasetVersion }); if (versionItems.length === 0) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "EXPERIMENT_NO_ITEMS", text: `No items in dataset ${datasetId} at version ${datasetVersion}`, domain: "STORAGE", category: "USER" }); } items = versionItems.map((v) => ({ id: v.id, datasetVersion: v.datasetVersion, input: v.input, groundTruth: v.groundTruth, requestContext: v.requestContext, metadata: v.metadata })); } else { throw new Error("No data source: provide datasetId or data"); } } catch (err) { await markFailedOnSetupError(err); throw err; } let execFn; try { if (config.task) { const taskFn = config.task; execFn = async (item, itemSignal) => { try { const result = await taskFn({ input: item.input, mastra, groundTruth: item.groundTruth, metadata: item.metadata, signal: itemSignal }); return { output: result, error: null, traceId: null }; } catch (err) { return { output: null, error: { message: err instanceof Error ? err.message : String(err), stack: err instanceof Error ? err.stack : void 0 }, traceId: null }; } }; } else if (targetType && targetId) { const resolved = await resolveTarget(mastra, targetType, targetId, agentVersion); if (!resolved) { throw new Error(`Target not found: ${targetType}/${targetId}`); } const { target } = resolved; execFn = (item, itemSignal) => { const mergedRequestContext = globalRequestContext || item.requestContext ? { ...globalRequestContext, ...item.requestContext } : void 0; return executeTarget(target, targetType, item, { signal: itemSignal, requestContext: mergedRequestContext, experimentId, versions }); }; } else { throw new Error("No task: provide targetType+targetId or task"); } } catch (err) { await markFailedOnSetupError(err); throw err; } let stepsConfigInput; const flatScorerInput = (() => { if (!scorerInput) return void 0; if (Array.isArray(scorerInput)) return scorerInput; const flat = []; if ("agent" in scorerInput && scorerInput.agent) flat.push(...scorerInput.agent); if ("workflow" in scorerInput && scorerInput.workflow) flat.push(...scorerInput.workflow); if ("trajectory" in scorerInput && scorerInput.trajectory) flat.push(...scorerInput.trajectory); if ("steps" in scorerInput && scorerInput.steps) { stepsConfigInput = scorerInput.steps; } return flat; })(); let mergedScorerInput = flatScorerInput; const datasetScorerIds = datasetRecord?.scorerIds ?? []; if (datasetScorerIds.length > 0) { mergedScorerInput = [...flatScorerInput ?? [], ...datasetScorerIds]; } if (mergedScorerInput && mergedScorerInput.length > 0) { const seen = /* @__PURE__ */ new Set(); mergedScorerInput = mergedScorerInput.filter((entry) => { if (typeof entry === "string") { if (seen.has(entry)) return false; seen.add(entry); return true; } return true; }); } const scorers = resolveScorers(mastra, mergedScorerInput); const stepScorers = resolveStepScorers(mastra, stepsConfigInput); if (experimentsStore) { if (!providedExperimentId) { await experimentsStore.createExperiment({ id: experimentId, name, description, metadata, datasetId: datasetId ?? null, datasetVersion, targetType: targetType ?? "agent", targetId: targetId ?? "inline", totalItems: items.length, agentVersion }); } await experimentsStore.updateExperiment({ id: experimentId, status: "running", totalItems: items.length, startedAt }); } let succeededCount = 0; let failedCount = 0; const results = new Array(items.length); const PROGRESS_UPDATE_INTERVAL = 2e3; let lastProgressUpdate = 0; try { const pMap = (await import('p-map')).default; await pMap( items.map((item, idx) => ({ item, idx })), async ({ item, idx }) => { if (signal?.aborted) { throw new DOMException("Aborted", "AbortError"); } const itemStartedAt = /* @__PURE__ */ new Date(); let itemSignal = signal; if (itemTimeout) { const timeoutSignal = AbortSignal.timeout(itemTimeout); itemSignal = signal ? AbortSignal.any([signal, timeoutSignal]) : timeoutSignal; } let retryCount = 0; let execResult = await execFn(item, itemSignal); while (execResult.error && retryCount < maxRetries) { if (execResult.error.message.toLowerCase().includes("abort")) break; retryCount++; const delay = Math.min(1e3 * Math.pow(2, retryCount - 1), 3e4); const jitter = delay * 0.2 * Math.random(); await new Promise((r) => setTimeout(r, delay + jitter)); if (signal?.aborted) { throw new DOMException("Aborted", "AbortError"); } execResult = await execFn(item, itemSignal); } const itemCompletedAt = /* @__PURE__ */ new Date(); if (execResult.error) { failedCount++; } else { succeededCount++; } const itemResult = { itemId: item.id, itemVersion: item.datasetVersion ?? 0, input: item.input, output: execResult.output, groundTruth: item.groundTruth ?? null, error: execResult.error, startedAt: itemStartedAt, completedAt: itemCompletedAt, retryCount }; const workflowData = execResult.stepResults || execResult.stepExecutionPath ? { stepResults: execResult.stepResults, stepExecutionPath: execResult.stepExecutionPath, spanId: execResult.spanId } : void 0; const flatScores = await runScorersForItem( scorers, item, execResult.output, storage ?? null, experimentId, targetType ?? "agent", targetId ?? "inline", item.id, execResult.scorerInput, execResult.scorerOutput, execResult.traceId ?? void 0, workflowData ); const stepScores = await runStepScorersForItem( stepScorers, item, workflowData, storage ?? null, experimentId, targetType ?? "agent", targetId ?? "inline", item.id, execResult.traceId ?? void 0 ); const itemScores = [...flatScores, ...stepScores]; if (experimentsStore) { try { await experimentsStore.addExperimentResult({ experimentId, itemId: item.id, itemDatasetVersion: item.datasetVersion, input: item.input, output: execResult.output, groundTruth: item.groundTruth ?? null, error: execResult.error, startedAt: itemStartedAt, completedAt: itemCompletedAt, retryCount, traceId: execResult.traceId }); } catch (persistError) { console.warn(`Failed to persist result for item ${item.id}:`, persistError); } const now = Date.now(); if (now - lastProgressUpdate >= PROGRESS_UPDATE_INTERVAL) { lastProgressUpdate = now; try { await experimentsStore.updateExperiment({ id: experimentId, succeededCount, failedCount }); } catch { } } } results[idx] = { ...itemResult, scores: itemScores }; }, { concurrency: maxConcurrency } ); } catch { const completedAt2 = /* @__PURE__ */ new Date(); const skippedCount2 = items.length - succeededCount - failedCount; if (experimentsStore) { await experimentsStore.updateExperiment({ id: experimentId, status: "failed", succeededCount, failedCount, skippedCount: skippedCount2, completedAt: completedAt2 }); } return { experimentId, status: "failed", totalItems: items.length, succeededCount, failedCount, skippedCount: skippedCount2, completedWithErrors: false, startedAt, completedAt: completedAt2, results: results.filter(Boolean) }; } const completedAt = /* @__PURE__ */ new Date(); const status = failedCount === items.length ? "failed" : "completed"; const completedWithErrors = status === "completed" && failedCount > 0; const skippedCount = items.length - succeededCount - failedCount; if (experimentsStore) { await experimentsStore.updateExperiment({ id: experimentId, status, succeededCount, failedCount, skippedCount, completedAt }); } return { experimentId, status, totalItems: items.length, succeededCount, failedCount, skippedCount, completedWithErrors, startedAt, completedAt, results }; } async function resolveTarget(mastra, targetType, targetId, agentVersion) { let resolved = null; switch (targetType) { case "agent": try { if (agentVersion) { resolved = await mastra.getAgentById(targetId, { versionId: agentVersion }); } else { resolved = mastra.getAgentById(targetId); } } catch { try { if (agentVersion) { resolved = await mastra.getAgent(targetId, { versionId: agentVersion }); } else { resolved = mastra.getAgent(targetId); } } catch { } } break; case "workflow": try { resolved = mastra.getWorkflowById(targetId); } catch { try { resolved = mastra.getWorkflow(targetId); } catch { } } break; case "scorer": try { resolved = mastra.getScorerById(targetId) ?? null; } catch { } break; } return resolved ? { target: resolved } : null; } var Dataset = class { id; #mastra; #datasetsStore; #experimentsStore; constructor(id, mastra) { this.id = id; this.#mastra = mastra; } // --------------------------------------------------------------------------- // Lazy storage resolution // --------------------------------------------------------------------------- async #getDatasetsStore() { if (this.#datasetsStore) return this.#datasetsStore; const storage = this.#mastra.getStorage(); if (!storage) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORAGE_NOT_CONFIGURED", text: "Storage not configured. Configure storage in Mastra instance.", domain: "STORAGE", category: "USER" }); } const store = await storage.getStore("datasets"); if (!store) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORE_NOT_AVAILABLE", text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.", domain: "STORAGE", category: "USER" }); } this.#datasetsStore = store; return store; } async #getExperimentsStore() { if (this.#experimentsStore) return this.#experimentsStore; const storage = this.#mastra.getStorage(); if (!storage) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORAGE_NOT_CONFIGURED", text: "Storage not configured. Configure storage in Mastra instance.", domain: "STORAGE", category: "USER" }); } const store = await storage.getStore("experiments"); if (!store) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "EXPERIMENTS_STORE_NOT_AVAILABLE", text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.", domain: "STORAGE", category: "USER" }); } this.#experimentsStore = store; return store; } // --------------------------------------------------------------------------- // Dataset metadata // --------------------------------------------------------------------------- /** * Get the full dataset record from storage. */ async getDetails() { const store = await this.#getDatasetsStore(); const record = await store.getDatasetById({ id: this.id }); if (!record) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASET_NOT_FOUND", text: `Dataset not found: ${this.id}`, domain: "STORAGE", category: "USER" }); } return record; } /** * Update dataset metadata and/or schemas. * Zod schemas are automatically converted to JSON Schema. */ async update(input) { const store = await this.#getDatasetsStore(); let { inputSchema, groundTruthSchema, ...rest } = input; if (inputSchema !== void 0 && inputSchema !== null && schemaCompat.isZodType(inputSchema)) { inputSchema = zodToJson.zodToJsonSchema(inputSchema); } if (groundTruthSchema !== void 0 && groundTruthSchema !== null && schemaCompat.isZodType(groundTruthSchema)) { groundTruthSchema = zodToJson.zodToJsonSchema(groundTruthSchema); } return store.updateDataset({ id: this.id, ...rest, inputSchema, groundTruthSchema }); } // --------------------------------------------------------------------------- // Item CRUD // --------------------------------------------------------------------------- /** * Add a single item to the dataset. */ async addItem(input) { const store = await this.#getDatasetsStore(); return store.addItem({ datasetId: this.id, input: input.input, groundTruth: input.groundTruth, expectedTrajectory: input.expectedTrajectory, requestContext: input.requestContext, metadata: input.metadata, source: input.source }); } /** * Add multiple items to the dataset in bulk. */ async addItems(input) { const store = await this.#getDatasetsStore(); return store.batchInsertItems({ datasetId: this.id, items: input.items }); } /** * Get a single item by ID, optionally at a specific version. */ async getItem(args) { const store = await this.#getDatasetsStore(); return store.getItemById({ id: args.itemId, datasetVersion: args.version }); } /** * List items in the dataset, optionally at a specific version. */ async listItems(args) { const store = await this.#getDatasetsStore(); if (args?.version) { return store.getItemsByVersion({ datasetId: this.id, version: args.version }); } return store.listItems({ datasetId: this.id, search: args?.search, pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 } }); } /** * Update an existing item in the dataset. */ async updateItem(input) { const store = await this.#getDatasetsStore(); return store.updateItem({ id: input.itemId, datasetId: this.id, input: input.input, groundTruth: input.groundTruth, expectedTrajectory: input.expectedTrajectory, requestContext: input.requestContext, metadata: input.metadata }); } /** * Delete a single item from the dataset. */ async deleteItem(args) { const store = await this.#getDatasetsStore(); return store.deleteItem({ id: args.itemId, datasetId: this.id }); } /** * Delete multiple items from the dataset in bulk. */ async deleteItems(args) { const store = await this.#getDatasetsStore(); return store.batchDeleteItems({ datasetId: this.id, itemIds: args.itemIds }); } // --------------------------------------------------------------------------- // Versioning // --------------------------------------------------------------------------- /** * List all versions of this dataset. */ async listVersions(args) { const store = await this.#getDatasetsStore(); return store.listDatasetVersions({ datasetId: this.id, pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 } }); } /** * Get full SCD-2 history of a specific item across all dataset versions. */ async getItemHistory(args) { const store = await this.#getDatasetsStore(); return store.getItemHistory(args.itemId); } // --------------------------------------------------------------------------- // Experiments // --------------------------------------------------------------------------- /** * Run an experiment on this dataset and wait for completion. */ async startExperiment(config) { return runExperiment(this.#mastra, { datasetId: this.id, ...config }); } /** * Start an experiment asynchronously (fire-and-forget). * Returns immediately with the experiment ID and pending status. */ async startExperimentAsync(config) { const experimentsStore = await this.#getExperimentsStore(); const datasetsStore = await this.#getDatasetsStore(); const dataset = await datasetsStore.getDatasetById({ id: this.id }); if (!dataset) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASET_NOT_FOUND", text: `Dataset not found: ${this.id}`, domain: "STORAGE", category: "USER" }); } const targetVersion = config.version ?? dataset.version; const items = await datasetsStore.getItemsByVersion({ datasetId: this.id, version: targetVersion }); if (items.length === 0) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "EXPERIMENT_NO_ITEMS", text: `Cannot run experiment: dataset "${this.id}" has no items at version ${targetVersion}`, domain: "STORAGE", category: "USER" }); } const run = await experimentsStore.createExperiment({ datasetId: this.id, datasetVersion: targetVersion, targetType: config.targetType ?? "agent", targetId: config.targetId ?? "inline", totalItems: items.length, name: config.name, description: config.description, metadata: config.metadata, agentVersion: config.agentVersion }); const experimentId = run.id; void runExperiment(this.#mastra, { datasetId: this.id, experimentId, ...config, version: targetVersion }).catch(async (err) => { await experimentsStore.updateExperiment({ id: experimentId, status: "failed", completedAt: /* @__PURE__ */ new Date() }).catch(() => { }); this.#mastra.getLogger()?.error(`Experiment ${experimentId} failed: ${err?.message ?? err}`); }); return { experimentId, status: "pending", totalItems: items.length }; } /** * List all experiments (runs) for this dataset. */ async listExperiments(args) { const experimentsStore = await this.#getExperimentsStore(); return experimentsStore.listExperiments({ datasetId: this.id, pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 } }); } /** * Get a specific experiment (run) by ID. */ async getExperiment(args) { const experimentsStore = await this.#getExperimentsStore(); return experimentsStore.getExperimentById({ id: args.experimentId }); } /** * List results for a specific experiment. */ async listExperimentResults(args) { const experimentsStore = await this.#getExperimentsStore(); return experimentsStore.listExperimentResults({ experimentId: args.experimentId, pagination: { page: args?.page ?? 0, perPage: args?.perPage ?? 20 } }); } /** * Delete an experiment (run) by ID. */ /** * Update an experiment result's status or tags. */ async updateExperimentResult(input) { const experimentsStore = await this.#getExperimentsStore(); return experimentsStore.updateExperimentResult(input); } async deleteExperiment(args) { const experimentsStore = await this.#getExperimentsStore(); return experimentsStore.deleteExperiment({ id: args.experimentId }); } }; var DatasetsManager = class { #mastra; #datasetsStore; #experimentsStore; constructor(mastra) { this.#mastra = mastra; } // --------------------------------------------------------------------------- // Lazy storage resolution // --------------------------------------------------------------------------- async #getDatasetsStore() { if (this.#datasetsStore) return this.#datasetsStore; const storage = this.#mastra.getStorage(); if (!storage) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORAGE_NOT_CONFIGURED", text: "Storage not configured. Configure storage in Mastra instance.", domain: "STORAGE", category: "USER" }); } const store = await storage.getStore("datasets"); if (!store) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORE_NOT_AVAILABLE", text: "Datasets store not available. Ensure your storage adapter provides a datasets domain.", domain: "STORAGE", category: "USER" }); } this.#datasetsStore = store; return store; } async #getExperimentsStore() { if (this.#experimentsStore) return this.#experimentsStore; const storage = this.#mastra.getStorage(); if (!storage) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "DATASETS_STORAGE_NOT_CONFIGURED", text: "Storage not configured. Configure storage in Mastra instance.", domain: "STORAGE", category: "USER" }); } const store = await storage.getStore("experiments"); if (!store) { throw new chunk4U7ZLI36_cjs.MastraError({ id: "EXPERIMENTS_STORE_NOT_AVAILABLE", text: "Experiments store not available. Ensure your storage adapter provides an experiments domain.", domain: "STORAGE", category: "USER" }); } this.#experimentsStore = store; return store; } // --------------------------------------------------------------------------- // Dataset CRUD // --------------------------------------------------------------------------- /** * Create a new dataset. * Zod schemas are automatically converted to JSON Schema. */ async create(input) { const store = await this.#getDatasetsStore(); let { inputS