@salesforce/agents
Version:
Client side APIs for working with Salesforce agents
469 lines • 19.7 kB
JavaScript
/*
* Copyright (c) 2024, salesforce.com, inc.
* All rights reserved.
* Licensed under the BSD 3-Clause license.
* For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.AgentTester = exports.AgentTestCreateLifecycleStages = void 0;
exports.convertTestResultsToFormat = convertTestResultsToFormat;
exports.normalizeResults = normalizeResults;
exports.humanFriendlyName = humanFriendlyName;
exports.writeTestSpec = writeTestSpec;
exports.generateTestSpecFromAiEvalDefinition = generateTestSpecFromAiEvalDefinition;
const promises_1 = require("node:fs/promises");
const node_path_1 = require("node:path");
const core_1 = require("@salesforce/core");
const kit_1 = require("@salesforce/kit");
const source_deploy_retrieve_1 = require("@salesforce/source-deploy-retrieve");
const yaml_1 = require("yaml");
const fast_xml_parser_1 = require("fast-xml-parser");
const maybe_mock_1 = require("./maybe-mock");
const utils_1 = require("./utils");
/**
* Events emitted during agent test creation for consumers to listen to and keep track of progress.
*/
exports.AgentTestCreateLifecycleStages = {
CreatingLocalMetadata: 'Creating Local Metadata',
Waiting: 'Waiting for the org to respond',
DeployingMetadata: 'Deploying Metadata',
Done: 'Done',
};
/**
* A service for testing agents using `AiEvaluationDefinition` metadata. Start asynchronous
* test runs, get or poll for test status, and get detailed test results.
*
* **Examples**
*
* Create an instance of the service:
*
* `const agentTester = new AgentTester(connection);`
*
* Start a test run:
*
* `const startResponse = await agentTester.start(aiEvalDef);`
*
* Get the status for a test run:
*
* `const status = await agentTester.status(startResponse.runId);`
*
* Get detailed results for a test run:
*
* `const results = await agentTester.results(startResponse.runId);`
*/
class AgentTester {
connection;
maybeMock;
constructor(connection) {
this.connection = connection;
this.maybeMock = new maybe_mock_1.MaybeMock(connection);
}
/**
* List the AiEvaluationDefinitions available in the org.
*/
async list() {
return this.connection.metadata.list({ type: 'AiEvaluationDefinition' });
}
/**
* Initiates a test run (i.e., AI evaluation).
*
* @param aiEvalDefName - The name of the AI evaluation definition to run.
* @returns Promise that resolves with the response from starting the test.
*/
async start(aiEvalDefName) {
const url = '/einstein/ai-evaluations/runs';
return this.maybeMock.request('POST', url, {
aiEvaluationDefinitionName: aiEvalDefName,
});
}
/**
* Get the status of a test run.
*
* @param {string} jobId
* @returns {Promise<AgentTestStatusResponse>}
*/
async status(jobId) {
const url = `/einstein/ai-evaluations/runs/${jobId}`;
return this.maybeMock.request('GET', url);
}
/**
* Poll the status of a test run until the tests are complete or the timeout is reached.
*
* @param {string} jobId
* @param {Duration} timeout
* @returns {Promise<AgentTestResultsResponse>}
*/
async poll(jobId, { timeout = kit_1.Duration.minutes(5), } = {
timeout: kit_1.Duration.minutes(5),
}) {
const frequency = kit_1.env.getNumber('SF_AGENT_TEST_POLLING_FREQUENCY_MS', 1000);
const lifecycle = core_1.Lifecycle.getInstance();
const client = await core_1.PollingClient.create({
poll: async () => {
const statusResponse = await this.status(jobId);
if (statusResponse.status.toLowerCase() !== 'new') {
const resultsResponse = await this.results(jobId);
const totalTestCases = resultsResponse.testCases.length;
const passingTestCases = resultsResponse.testCases.filter((tc) => tc.status.toLowerCase() === 'completed' && tc.testResults.every((r) => r.result === 'PASS')).length;
const failingTestCases = resultsResponse.testCases.filter((tc) => ['error', 'completed'].includes(tc.status.toLowerCase()) &&
tc.testResults.some((r) => r.result === 'FAILURE')).length;
if (resultsResponse.status.toLowerCase() === 'completed') {
await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
jobId,
status: resultsResponse.status,
totalTestCases,
failingTestCases,
passingTestCases,
});
return { payload: resultsResponse, completed: true };
}
await lifecycle.emit('AGENT_TEST_POLLING_EVENT', {
jobId,
status: resultsResponse.status,
totalTestCases,
failingTestCases,
passingTestCases,
});
}
return { completed: false };
},
frequency: kit_1.Duration.milliseconds(frequency),
timeout,
});
return client.subscribe();
}
/**
* Get detailed test run results.
*
* @param {string} jobId
* @returns {Promise<AgentTestResultsResponse>}
*/
async results(jobId) {
const url = `/einstein/ai-evaluations/runs/${jobId}/results`;
const results = await this.maybeMock.request('GET', url);
return normalizeResults(results);
}
/**
* Cancel an in-progress test run.
*
* @param {string} jobId
* @returns {Promise<{success: boolean}>}
*/
async cancel(jobId) {
const url = `/einstein/ai-evaluations/runs/${jobId}/cancel`;
return this.maybeMock.request('POST', url);
}
/**
* Creates and deploys an AiEvaluationDefinition from a specification file.
*
* @param apiName - The API name of the AiEvaluationDefinition to create
* @param specFilePath - The path to the specification file to create the definition from
* @param options - Configuration options for creating the definition
* @param options.outputDir - The directory where the AiEvaluationDefinition file will be written
* @param options.preview - If true, writes the AiEvaluationDefinition file to <api-name>-preview-<timestamp>.xml in the current working directory and does not deploy it
*
* @returns Promise containing:
* - path: The filesystem path to the created AiEvaluationDefinition file
* - contents: The AiEvaluationDefinition contents as a string
* - deployResult: The deployment result (if not in preview mode)
*
* @throws {SfError} When deployment fails
*/
async create(apiName, specFilePath, options) {
const parsed = (0, yaml_1.parse)(await (0, promises_1.readFile)(specFilePath, 'utf-8'));
const lifecycle = core_1.Lifecycle.getInstance();
await lifecycle.emit(exports.AgentTestCreateLifecycleStages.CreatingLocalMetadata, {});
const preview = options.preview ?? false;
// outputDir is overridden if preview is true
const outputDir = preview ? process.cwd() : options.outputDir;
const filename = preview
? `${apiName}-preview-${new Date().toISOString()}.xml`
: `${apiName}.aiEvaluationDefinition-meta.xml`;
const definitionPath = (0, node_path_1.join)(outputDir, filename);
const builder = new fast_xml_parser_1.XMLBuilder({
format: true,
attributeNamePrefix: '$',
indentBy: ' ',
ignoreAttributes: false,
});
const xml = builder.build({
AiEvaluationDefinition: {
$xmlns: 'http://soap.sforce.com/2006/04/metadata',
...(parsed.description && { description: parsed.description }),
name: parsed.name,
subjectName: parsed.subjectName,
subjectType: parsed.subjectType,
...(parsed.subjectVersion && { subjectVersion: parsed.subjectVersion }),
testCase: parsed.testCases.map((tc) => ({
expectation: [
{
expectedValue: tc.expectedTopic,
name: 'topic_sequence_match',
},
{
expectedValue: `[${(tc.expectedActions ?? []).map((v) => `"${v}"`).join(',')}]`,
name: 'action_sequence_match',
},
{
expectedValue: tc.expectedOutcome,
name: 'bot_response_rating',
},
],
inputs: {
utterance: tc.utterance,
},
number: parsed.testCases.indexOf(tc) + 1,
})),
},
});
const finalXml = `<?xml version="1.0" encoding="UTF-8"?>\n${xml}`;
await (0, promises_1.mkdir)(outputDir, { recursive: true });
await (0, promises_1.writeFile)(definitionPath, finalXml);
if (preview)
return {
path: definitionPath,
contents: finalXml,
};
const cs = await source_deploy_retrieve_1.ComponentSetBuilder.build({ sourcepath: [definitionPath] });
const deploy = await cs.deploy({ usernameOrConnection: this.connection });
deploy.onUpdate((status) => {
if (status.status === source_deploy_retrieve_1.RequestStatus.Pending) {
void lifecycle.emit(exports.AgentTestCreateLifecycleStages.Waiting, status);
}
else {
void lifecycle.emit(exports.AgentTestCreateLifecycleStages.DeployingMetadata, status);
}
});
deploy.onFinish((result) => {
// small deploys like this, 1 file, can happen without an 'update' event being fired
// onFinish, emit the update, and then the done event to create proper output
void lifecycle.emit(exports.AgentTestCreateLifecycleStages.DeployingMetadata, result);
void lifecycle.emit(exports.AgentTestCreateLifecycleStages.Done, result);
});
const result = await deploy.pollStatus({ timeout: kit_1.Duration.minutes(10_000), frequency: kit_1.Duration.seconds(1) });
if (!result.response.success) {
throw new core_1.SfError(result.response.errorMessage ?? `Unable to deploy ${result.response.id}`);
}
return { path: definitionPath, contents: finalXml, deployResult: result };
}
}
exports.AgentTester = AgentTester;
/**
* Convert the raw, detailed test results to another format.
*
* @param results The detailed results from a test run.
* @param format The desired format. One of: json, junit, or tap.
* @returns
*/
async function convertTestResultsToFormat(results, format) {
switch (format) {
case 'json':
return jsonFormat(results);
case 'junit':
return junitFormat(results);
case 'tap':
return tapFormat(results);
default:
throw new Error(`Unsupported format: ${format}`);
}
}
/**
* Normalizes test results by decoding HTML entities in utterances and test result values.
*
* @param results - The agent test results response object to normalize
* @returns A new AgentTestResultsResponse with decoded HTML entities
*
* @example
* ```
* const results = {
* testCases: [{
* inputs: { utterance: ""hello"" },
* testResults: [{
* actualValue: "&test",
* expectedValue: "<value>"
* }]
* }]
* };
* const normalized = normalizeResults(results);
* ```
*/
function normalizeResults(results) {
return {
...results,
testCases: results.testCases.map((tc) => ({
...tc,
inputs: {
utterance: (0, utils_1.decodeHtmlEntities)(tc.inputs.utterance),
},
testResults: tc.testResults.map((r) => ({
...r,
actualValue: (0, utils_1.decodeHtmlEntities)(r.actualValue),
expectedValue: (0, utils_1.decodeHtmlEntities)(r.expectedValue),
})),
})),
};
}
async function jsonFormat(results) {
return Promise.resolve(JSON.stringify(results, null, 2));
}
async function junitFormat(results) {
const builder = new fast_xml_parser_1.XMLBuilder({
format: true,
attributeNamePrefix: '$',
ignoreAttributes: false,
});
const testCount = results.testCases.length;
const failureCount = results.testCases.filter((tc) => ['error', 'completed'].includes(tc.status.toLowerCase()) && tc.testResults.some((r) => r.result === 'FAILURE')).length;
const time = results.testCases.reduce((acc, tc) => {
if (tc.endTime && tc.startTime) {
return acc + new Date(tc.endTime).getTime() - new Date(tc.startTime).getTime();
}
return acc;
}, 0);
const suites = builder.build({
testsuites: {
$name: results.subjectName,
$tests: testCount,
$failures: failureCount,
$time: time,
property: [
{ $name: 'status', $value: results.status },
{ $name: 'start-time', $value: results.startTime },
{ $name: 'end-time', $value: results.endTime },
],
testsuite: results.testCases.map((testCase) => {
const testCaseTime = testCase.endTime
? new Date(testCase.endTime).getTime() - new Date(testCase.startTime).getTime()
: 0;
return {
$name: testCase.testNumber,
$time: testCaseTime,
$assertions: testCase.testResults.length,
failure: testCase.testResults
.map((r) => {
if (r.result === 'FAILURE') {
return { $message: r.errorMessage ?? 'Unknown error', $name: r.name };
}
})
.filter((f) => f),
};
}),
},
});
return Promise.resolve(`<?xml version="1.0" encoding="UTF-8"?>\n${suites}`.trim());
}
function humanFriendlyName(name) {
// topic_sequence_match, action_sequence_match, and bot_response_rating have all changed
// eventually we can remove them
switch (name) {
case 'topic_sequence_match':
case 'topic_assertion':
return 'Topic';
case 'action_sequence_match':
case 'actions_assertion':
return 'Action';
case 'output_latency_milliseconds':
return 'Output Latency';
case 'instruction_following':
return 'Instruction Following';
case 'bot_response_rating':
case 'output_validation':
return 'Outcome';
default:
return name;
}
}
async function tapFormat(results) {
const lines = [];
let expectationCount = 0;
for (const testCase of results.testCases) {
for (const result of testCase.testResults) {
const status = result.result === 'PASS' ? 'ok' : 'not ok';
expectationCount++;
lines.push(`${status} ${expectationCount} ${testCase.testNumber}.${result.name}`);
if (status === 'not ok') {
lines.push(' ---');
lines.push(` message: ${result.errorMessage ?? 'Unknown error'}`);
lines.push(` expectation: ${result.name}`);
lines.push(` actual: ${result.actualValue}`);
lines.push(` expected: ${result.expectedValue}`);
lines.push(' ...');
}
}
}
return Promise.resolve(`Tap Version 14\n1..${expectationCount}\n${lines.join('\n')}`);
}
function transformStringToArray(str) {
try {
if (!str)
return [];
// Remove any whitespace and ensure proper JSON format
const cleaned = str.replace(/\s+/g, '');
return JSON.parse(cleaned);
}
catch {
return [];
}
}
function castArray(value) {
return Array.isArray(value) ? value : [value];
}
/**
* Generate a test specification file in YAML format.
* This function takes a test specification object, cleans it by removing undefined and empty string values,
* converts it to YAML format, and writes it to the specified output file.
*
* @param spec - The test specification object to be converted to YAML.
* @param outputFile - The file path where the YAML output should be written.
* @throws {Error} - May throw an error if file operations fail.
* @returns A Promise that resolves when the file has been written.
*/
async function writeTestSpec(spec, outputFile) {
// strip out undefined values and empty strings
const clean = Object.entries(spec).reduce((acc, [key, value]) => {
if (value !== undefined && value !== '')
return { ...acc, [key]: value };
return acc;
}, {});
const yml = (0, yaml_1.stringify)(clean, undefined, {
minContentWidth: 0,
lineWidth: 0,
});
await (0, promises_1.mkdir)((0, node_path_1.dirname)(outputFile), { recursive: true });
await (0, promises_1.writeFile)(outputFile, yml);
}
/**
* Generates a TestSpec object from an AI Evaluation Definition XML file.
*
* @param path - The file path to the AI Evaluation Definition XML file.
* @returns Promise that resolves to a TestSpec object containing the parsed evaluation definition data.
* @description Reads and parses an XML file containing AIEvaluationDefinition, converting it into a structured TestSpec format.
*
* @throws {Error} If the file cannot be read or parsed.
*/
async function generateTestSpecFromAiEvalDefinition(path) {
const xml = await (0, promises_1.readFile)(path, 'utf-8');
const parser = new fast_xml_parser_1.XMLParser();
const parsed = parser.parse(xml);
return {
name: parsed.AiEvaluationDefinition.name,
description: parsed.AiEvaluationDefinition.description,
subjectType: parsed.AiEvaluationDefinition.subjectType,
subjectName: parsed.AiEvaluationDefinition.subjectName,
subjectVersion: parsed.AiEvaluationDefinition.subjectVersion,
testCases: castArray(parsed.AiEvaluationDefinition.testCase).map((tc) => {
const expectations = castArray(tc.expectation);
return {
utterance: tc.inputs.utterance,
// TODO: remove old names once removed in 258 (topic_sequence_match, action_sequence_match, bot_response_rating)
expectedTopic: expectations.find((e) => e.name === 'topic_sequence_match' || e.name === 'topic_assertion')
?.expectedValue,
expectedActions: transformStringToArray(expectations.find((e) => e.name === 'action_sequence_match' || e.name === 'actions_assertion')?.expectedValue),
expectedOutcome: expectations.find((e) => e.name === 'bot_response_rating' || e.name === 'output_validation')
?.expectedValue,
};
}),
};
}
//# sourceMappingURL=agentTester.js.map
;