UNPKG

@maximai/maxim-js

Version:

Maxim AI JS SDK. Visit https://getmaxim.ai for more info.

629 lines 29 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.runOutputFunction = runOutputFunction; exports.runOutputFunctionWithTracing = runOutputFunctionWithTracing; exports.runLocalEvaluations = runLocalEvaluations; exports.workflowIdOutputFunctionClosure = workflowIdOutputFunctionClosure; exports.promptVersionIdOutputFunctionClosure = promptVersionIdOutputFunctionClosure; exports.promptChainVersionIdOutputFunctionClosure = promptChainVersionIdOutputFunctionClosure; exports.simulationPromptVersionIdOutputFunctionClosure = simulationPromptVersionIdOutputFunctionClosure; exports.simulationWorkflowIdOutputFunctionClosure = simulationWorkflowIdOutputFunctionClosure; exports.simulationYieldsOutputFunctionClosure = simulationYieldsOutputFunctionClosure; async function runOutputFunction(outputFunction, dataEntry) { try { const result = await outputFunction(dataEntry); return result; } catch (err) { throw new Error(`Error while running output function`, { cause: err, }); } } async function runOutputFunctionWithTracing(outputFunction, dataEntry, traceId) { try { const result = await outputFunction(dataEntry, traceId); return result; } catch (err) { throw new Error(`Error while running output function`, { cause: err, }); } } /** * Runs local evaluations on the data entry. * @param evaluators - The evaluators to run * @param dataEntry - The data entry to evaluate * @param output - The output of the run * @param contextToEvaluate - The context to evaluate * @returns The results of the evaluations */ async function runLocalEvaluations(evaluators, dataEntry, output, contextToEvaluate) { try { const evaluatorResults = await Promise.all(evaluators.map(async (evaluator) => { // Build the result object with fixed properties const evaluationResultArgs = { output: output.data, contextToEvaluate, simulationOutputs: output.simulationOutputs, }; // Build the variables object separately from variableMapping const variables = {}; if (evaluator.variableMapping) { for (const [key, mappingFn] of Object.entries(evaluator.variableMapping)) { try { const mappedValue = mappingFn(output, dataEntry); if (mappedValue !== undefined) { variables[key] = mappedValue; } } catch (error) { throw new Error(`Error in variable mapping for key "${key}": ${error instanceof Error ? error.message : String(error)}`); } } } // Use output.data for the result output const evaluatorOutput = output.data; const simulationOutputs = output.simulationOutputs; if ("names" in evaluator) { try { const results = await evaluator.evaluationFunction(evaluationResultArgs, { ...dataEntry, }, variables); return Object.entries(results).map(([evaluatorName, result]) => { const name = evaluator.names.find((name) => name === evaluatorName); if (!name) { return { name: evaluatorName, passFailCriteria: evaluator.passFailCriteria[evaluatorName], output: evaluatorOutput, simulationOutputs, result: { score: "Err", reasoning: `No name found for "${evaluatorName}" in combined evaluator with names ${evaluator.names}`, }, }; } const passFailCriteria = evaluator.passFailCriteria[evaluatorName]; if (!passFailCriteria) { return { name: evaluatorName, passFailCriteria: evaluator.passFailCriteria[evaluatorName], output: evaluatorOutput, simulationOutputs, result: { score: "Err", reasoning: `No pass fail criteria found with name "${evaluatorName}" for combined evaluator with names ${evaluator.names}`, }, }; } return { name, passFailCriteria, output: evaluatorOutput, simulationOutputs, result, }; }); } catch (err) { return evaluator.names.map((name) => { return { name, passFailCriteria: evaluator.passFailCriteria[name], output: evaluatorOutput, simulationOutputs, result: { score: "Err", reasoning: `Error while running combined evaluator with names ${evaluator.names}: ${err instanceof Error ? err.message : JSON.stringify(err)}`, }, }; }); } } else { try { const result = await evaluator.evaluationFunction(evaluationResultArgs, { ...dataEntry, }, variables); return [{ name: evaluator.name, passFailCriteria: evaluator.passFailCriteria, output: evaluatorOutput, simulationOutputs, result }]; } catch (err) { return [ { name: evaluator.name, passFailCriteria: evaluator.passFailCriteria, output: evaluatorOutput, simulationOutputs, result: { score: "Err", reasoning: `Error while running evaluator "${evaluator.name}": ${err instanceof Error ? err.message : JSON.stringify(err)}`, }, }, ]; } } })); return evaluatorResults.flat(); } catch (err) { return evaluators .map((evaluator) => { const fallbackOutput = output.data; // Simplified fallback on error const fallbackSimulationOutputs = output.simulationOutputs; if ("names" in evaluator) { return evaluator.names.map((name) => { return { name, passFailCriteria: evaluator.passFailCriteria[name], output: fallbackOutput, simulationOutputs: fallbackSimulationOutputs, result: { score: "Err", reasoning: `Error while running local evaluators overall: ${err instanceof Error ? err.message : JSON.stringify(err)}`, }, }; }); } return [ { name: evaluator.name, passFailCriteria: evaluator.passFailCriteria, output: fallbackOutput, simulationOutputs: fallbackSimulationOutputs, result: { score: "Err", reasoning: `Error while local evaluators overall: ${err instanceof Error ? err.message : JSON.stringify(err)}`, }, }, ]; }) .flat(); } } function workflowIdOutputFunctionClosure(workflowId, TestRunAPIService, contextToEvaluate) { return async (data) => { var _a; const result = await TestRunAPIService.executeWorkflowForData({ dataEntry: data, workflowId, contextToEvaluate, }); return { data: (_a = result.output) !== null && _a !== void 0 ? _a : "", retrievedContextToEvaluate: result.contextToEvaluate, messages: result.messages, meta: { usage: { latency: result.latency, }, }, }; }; } function promptVersionIdOutputFunctionClosure(promptVersionId, input, TestRunAPIService, contextToEvaluate, simulationConfig) { return async (data) => { var _a; const result = await TestRunAPIService.executePromptForData({ dataEntry: data, input, promptVersionId, contextToEvaluate, simulationConfig, }); return { data: (_a = result.output) !== null && _a !== void 0 ? _a : "", retrievedContextToEvaluate: result.contextToEvaluate, messages: result.messages, meta: { usage: result.usage, cost: result.cost, }, }; }; } function promptChainVersionIdOutputFunctionClosure(promptChainVersionId, input, TestRunAPIService, contextToEvaluate) { return async (data) => { var _a; const result = await TestRunAPIService.executePromptChainForData({ dataEntry: data, input, promptChainVersionId, contextToEvaluate, }); return { data: (_a = result.output) !== null && _a !== void 0 ? _a : "", retrievedContextToEvaluate: result.contextToEvaluate, messages: result.messages, meta: { usage: result.usage, cost: result.cost, }, }; }; } /** * Polls the simulation status endpoint until completion, failure, or timeout. * Uses the same timeout as post-push polling (timeoutInMinutes). */ async function pollSimulationPromptStatus(TestRunAPIService, workspaceId, testRunEntryId, pollingInterval, timeoutInMinutes) { let pollCount = 0; const maxIterations = Math.ceil((Math.round(timeoutInMinutes) * 60) / pollingInterval); while (true) { pollCount++; const statusResult = await TestRunAPIService.getSimulationPromptStatus({ workspaceId, testRunEntryId }); if (statusResult.status === "COMPLETE" || statusResult.status === "STOPPED") { return statusResult; } if (statusResult.status === "FAILED") { throw new Error(`Simulation failed for testRunEntryId: ${testRunEntryId}`); } if (!statusResult.status) { return statusResult; } if (pollCount > maxIterations) { throw new Error(`Simulation is taking over timeout period (${Math.round(timeoutInMinutes)} minutes) to complete for testRunEntryId: ${testRunEntryId}`); } await new Promise((resolve) => setTimeout(resolve, pollingInterval * 1000)); } } async function pollSimulationWorkflowStatus(TestRunAPIService, workspaceId, testRunEntryId, pollingInterval, timeoutInMinutes) { let pollCount = 0; const maxIterations = Math.ceil((Math.round(timeoutInMinutes) * 60) / pollingInterval); while (true) { pollCount++; const statusResult = await TestRunAPIService.getSimulationWorkflowStatus({ workspaceId, testRunEntryId }); if (statusResult.status === "COMPLETE" || statusResult.status === "STOPPED") { return statusResult; } if (statusResult.status === "FAILED") { throw new Error(`Simulation failed for testRunEntryId: ${testRunEntryId}`); } if (!statusResult.status) { return statusResult; } if (pollCount > maxIterations) { throw new Error(`Simulation is taking over timeout period (${Math.round(timeoutInMinutes)} minutes) to complete for testRunEntryId: ${testRunEntryId}`); } await new Promise((resolve) => setTimeout(resolve, pollingInterval * 1000)); } } function simulationPromptVersionIdOutputFunctionClosure(testRunId, promptVersionId, workspaceId, scenario, TestRunAPIService, simulationConfig, contextToEvaluate, datasetEntryId, input, expectedSteps, timeoutInMinutes = 15) { return async (data) => { var _a, _b, _c; try { // Step 1: Call POST endpoint to start simulation const postResult = await TestRunAPIService.executeSimulationPromptForData({ testRunId, promptVersionId, workspaceId, datasetEntryId, entry: { input: input !== null && input !== void 0 ? input : null, scenario: scenario !== null && scenario !== void 0 ? scenario : null, expectedSteps: expectedSteps !== null && expectedSteps !== void 0 ? expectedSteps : null, contextToEvaluate: contextToEvaluate !== null && contextToEvaluate !== void 0 ? contextToEvaluate : null, dataEntry: data, }, simulationConfig, }); // Step 2: Poll GET endpoint until completion or timeout const pollingInterval = 5; const result = await pollSimulationPromptStatus(TestRunAPIService, postResult.workspaceId, postResult.testRunEntryId, pollingInterval, timeoutInMinutes); const outputs = (_a = result.outputs) !== null && _a !== void 0 ? _a : []; // Explicitly handle empty outputs to avoid indexing with lastIndex === -1. // When simulation returns no outputs, use empty string as safe default for data. const yieldedOutput = outputs.length === 0 ? { data: "", simulationOutputs: outputs, retrievedContextToEvaluate: undefined, messages: result.messages, simulationMeta: { testRunEntryId: postResult.testRunEntryId, sessionId: result.sessionId, simulationId: result.simulationId, messages: (_b = result.messages) !== null && _b !== void 0 ? _b : [], trace: result.trace, }, meta: { usage: result.usage, cost: result.cost, }, } : { data: outputs[outputs.length - 1], simulationOutputs: outputs, retrievedContextToEvaluate: undefined, messages: result.messages, simulationMeta: { testRunEntryId: postResult.testRunEntryId, sessionId: result.sessionId, simulationId: result.simulationId, messages: (_c = result.messages) !== null && _c !== void 0 ? _c : [], trace: result.trace, }, meta: { usage: result.usage, cost: result.cost, }, }; return yieldedOutput; } catch (error) { throw error; } }; } function simulationWorkflowIdOutputFunctionClosure(testRunId, workflowId, workspaceId, scenario, TestRunAPIService, simulationConfig, contextToEvaluate, datasetEntryId, input, expectedSteps, timeoutInMinutes = 15) { return async (data) => { var _a; try { const postResult = await TestRunAPIService.executeSimulationWorkflowForData({ testRunId, workflowId, workspaceId, datasetEntryId, entry: { input: input !== null && input !== void 0 ? input : null, scenario: scenario !== null && scenario !== void 0 ? scenario : null, expectedSteps: expectedSteps !== null && expectedSteps !== void 0 ? expectedSteps : null, contextToEvaluate: contextToEvaluate !== null && contextToEvaluate !== void 0 ? contextToEvaluate : null, dataEntry: data, }, simulationConfig, }); const pollingInterval = 5; const result = await pollSimulationWorkflowStatus(TestRunAPIService, postResult.workspaceId, postResult.testRunEntryId, pollingInterval, timeoutInMinutes); const outputs = (_a = result.outputs) !== null && _a !== void 0 ? _a : []; // Explicitly handle empty outputs to avoid indexing with lastIndex === -1. // When simulation returns no outputs, use empty string as safe default for data. const yieldedOutput = outputs.length === 0 ? { data: "", simulationOutputs: outputs, retrievedContextToEvaluate: undefined, messages: undefined, simulationMeta: { testRunEntryId: postResult.testRunEntryId, sessionId: result.sessionId, simulationId: result.simulationId, messages: [], trace: result.trace, turns: result.turns, }, meta: { usage: result.usage ? result.usage : undefined, cost: result.cost, }, } : { data: outputs[outputs.length - 1], simulationOutputs: outputs, retrievedContextToEvaluate: undefined, messages: undefined, simulationMeta: { testRunEntryId: postResult.testRunEntryId, sessionId: result.sessionId, simulationId: result.simulationId, messages: [], trace: result.trace, turns: result.turns, }, meta: { usage: result.usage ? result.usage : undefined, cost: result.cost, }, }; return yieldedOutput; } catch (error) { throw error; } }; } function simulationYieldsOutputFunctionClosure(testRunId, workspaceId, simulationConfig, outputFunction, TestRunAPIService, datasetEntryId, input, scenario, expectedSteps, contextToEvaluate, timeoutInMinutes = 15, logger) { return async (data) => { var _a, _b, _c; let testRunEntryId; try { const maxTurns = (_a = simulationConfig.maxTurns) !== null && _a !== void 0 ? _a : 10; const conversationHistory = []; const simulationOutputs = []; let sessionId; let simulationId; let stopReason; let isComplete = false; let turnNumber = 0; // Aggregated usage and cost let totalPromptTokens = 0; let totalCompletionTokens = 0; let totalTokens = 0; let totalInputCost = 0; let totalOutputCost = 0; let totalCost = 0; // Resolve persona with priority: dataset column > simulation config let datasetPersona; if (data && typeof data === "object") { for (const [key, value] of Object.entries(data)) { if (key.toLowerCase() === "persona" && value != null) { const personaStr = String(value).trim(); if (personaStr) { datasetPersona = personaStr; break; } } } } let simconfigPersona; if (simulationConfig.persona && !datasetPersona) { if (typeof simulationConfig.persona === "string") { simconfigPersona = simulationConfig.persona; } else if (simulationConfig.persona.type === "DATASET_COLUMN") { const colName = simulationConfig.persona.payload; const val = data && typeof data === "object" ? data[colName] : undefined; if (val != null) { const valStr = String(val).trim(); simconfigPersona = valStr || undefined; } } } const resolvedPersona = datasetPersona !== null && datasetPersona !== void 0 ? datasetPersona : simconfigPersona; const resolvedSimulationConfig = { ...simulationConfig, persona: resolvedPersona }; // Turn-by-turn simulation loop const simulationStartTime = Date.now(); while (turnNumber < maxTurns && !isComplete) { turnNumber++; // Call the local-execution endpoint to get the next user message const turnResult = await TestRunAPIService.executeSimulationLocalExecution({ testRunId, workspaceId, datasetEntryId: turnNumber === 1 ? datasetEntryId : undefined, entry: turnNumber === 1 ? { input: input !== null && input !== void 0 ? input : null, scenario: scenario !== null && scenario !== void 0 ? scenario : null, expectedSteps: expectedSteps !== null && expectedSteps !== void 0 ? expectedSteps : null, contextToEvaluate: contextToEvaluate !== null && contextToEvaluate !== void 0 ? contextToEvaluate : null, dataEntry: data, } : undefined, simulationConfig: resolvedSimulationConfig, conversationHistory: turnNumber > 1 ? conversationHistory : undefined, testRunEntryId, }); // Store testRunEntryId, sessionId, simulationId from first turn if (turnNumber === 1) { testRunEntryId = turnResult.testRunEntryId; sessionId = turnResult.sessionId; simulationId = turnResult.simulationId; } // Aggregate usage and cost if (turnResult.usage) { totalPromptTokens += turnResult.usage.promptTokens; totalCompletionTokens += turnResult.usage.completionTokens; totalTokens += turnResult.usage.totalTokens; } if (turnResult.cost) { totalInputCost += turnResult.cost.input; totalOutputCost += turnResult.cost.output; totalCost += turnResult.cost.total; } // Check stopReason from backend (triggers end of simulation, log the reason) if (turnResult.stopReason) { stopReason = turnResult.stopReason; logger.info(`Simulation stopped: ${stopReason}`); isComplete = true; break; } // userInput is normalized to Record<string, unknown>|null by the API layer const userInput = turnResult.userInput; // If userInput is null/undefined, simulation has ended if (userInput === null || userInput === undefined) { isComplete = true; break; } // Call the user's outputFunction with simulation context const assistantOutput = await outputFunction(data, { conversationHistory, currentUserInput: userInput, turnNumber, totalCost, totalTokens, }); // Build response for conversation history const response = { output: assistantOutput.data, tool_calls: (_b = assistantOutput.toolCalls) !== null && _b !== void 0 ? _b : [], }; simulationOutputs.push(assistantOutput.data); // Add turn to conversation history for next API call const normalizedRequest = { input: typeof userInput === "object" && userInput !== null ? ((_c = userInput["input"]) !== null && _c !== void 0 ? _c : "") : String(userInput !== null && userInput !== void 0 ? userInput : ""), }; conversationHistory.push({ turn: turnNumber, request: normalizedRequest, response, }); // Check stopTrigger if (simulationConfig.stopTrigger) { const fieldValue = getNestedFieldValue(assistantOutput, simulationConfig.stopTrigger.field); if (fieldValue === simulationConfig.stopTrigger.value) { isComplete = true; break; } } } // Build final YieldedOutput - usage/cost in simulationMeta for simulation runs const totalLatency = Date.now() - simulationStartTime; const lastTurn = conversationHistory.length > 0 ? { turn: conversationHistory.length, request: conversationHistory[conversationHistory.length - 1].request, response: conversationHistory[conversationHistory.length - 1].response, } : undefined; const finalOutput = { data: simulationOutputs[simulationOutputs.length - 1] || "", simulationOutputs, simulationMeta: { testRunEntryId, sessionId, simulationId, messages: conversationHistory, lastTurn, ...(stopReason && { stopReason }), usage: { promptTokens: totalPromptTokens, completionTokens: totalCompletionTokens, totalTokens: totalTokens, latency: totalLatency, }, cost: { input: totalInputCost, output: totalOutputCost, total: totalCost, }, }, }; return finalOutput; } catch (error) { if (testRunEntryId) { try { await TestRunAPIService.updateSimulationStatus(testRunEntryId, "FAILED"); } catch (cleanupError) { // Log but don't mask the original error const msg = `Failed to mark simulation as failed (testRunEntryId: ${testRunEntryId}): ${cleanupError instanceof Error ? cleanupError.message : String(cleanupError)}`; "error" in logger && typeof logger.error === "function" ? logger.error(msg) : logger.info(msg); } } throw error; } }; } // Helper function to get nested field value from an object function getNestedFieldValue(obj, fieldPath) { const keys = fieldPath.split("."); let value = obj; for (const key of keys) { if (value && typeof value === "object" && key in value) { value = value[key]; } else { return undefined; } } return value; } //# sourceMappingURL=runUtils.js.map