UNPKG

@salesforce/agents

Version:

Client side APIs for working with Salesforce agents

github.com/forcedotcom/agents

forcedotcom/agents

469 lines • 19.7 kB

JavaScript

"use strict"; /* * Copyright (c) 2024, salesforce.com, inc. * All rights reserved. * Licensed under the BSD 3-Clause license. * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause */ Object.defineProperty(exports, "__esModule", { value: true }); exports.AgentTester = exports.AgentTestCreateLifecycleStages = void 0; exports.convertTestResultsToFormat = convertTestResultsToFormat; exports.normalizeResults = normalizeResults; exports.humanFriendlyName = humanFriendlyName; exports.writeTestSpec = writeTestSpec; exports.generateTestSpecFromAiEvalDefinition = generateTestSpecFromAiEvalDefinition; const promises_1 = require("node:fs/promises"); const node_path_1 = require("node:path"); const core_1 = require("@salesforce/core"); const kit_1 = require("@salesforce/kit"); const source_deploy_retrieve_1 = require("@salesforce/source-deploy-retrieve"); const yaml_1 = require("yaml"); const fast_xml_parser_1 = require("fast-xml-parser"); const maybe_mock_1 = require("./maybe-mock"); const utils_1 = require("./utils"); /** * Events emitted during agent test creation for consumers to listen to and keep track of progress. */ exports.AgentTestCreateLifecycleStages = { CreatingLocalMetadata: 'Creating Local Metadata', Waiting: 'Waiting for the org to respond', DeployingMetadata: 'Deploying Metadata', Done: 'Done', }; /** * A service for testing agents using `AiEvaluationDefinition` metadata. Start asynchronous * test runs, get or poll for test status, and get detailed test results. * * **Examples** * * Create an instance of the service: * * `const agentTester = new AgentTester(connection);` * * Start a test run: * * `const startResponse = await agentTester.start(aiEvalDef);` * * Get the status for a test run: * * `const status = await agentTester.status(startResponse.runId);` * * Get detailed results for a test run: * * `const results = await agentTester.results(startResponse.runId);` */ class AgentTester { connection; maybeMock; constructor(connection) { this.connection = connection; this.maybeMock = new maybe_mock_1.MaybeMock(connection); } /** * List the AiEvaluationDefinitions available in the org. */ async list() { return this.connection.metadata.list({ type: 'AiEvaluationDefinition' }); } /** * Initiates a test run (i.e., AI evaluation). * * @param aiEvalDefName - The name of the AI evaluation definition to run. * @returns Promise that resolves with the response from starting the test. */ async start(aiEvalDefName) { const url = '/einstein/ai-evaluations/runs'; return this.maybeMock.request('POST', url, { aiEvaluationDefinitionName: aiEvalDefName, }); } /** * Get the status of a test run. * * @param {string} jobId * @returns {Promise<AgentTestStatusResponse>} */ async status(jobId) { const url = `/einstein/ai-evaluations/runs/${jobId}`; return this.maybeMock.request('GET', url); } /** * Poll the status of a test run until the tests are complete or the timeout is reached. * * @param {string} jobId * @param {Duration} timeout * @returns {Promise<AgentTestResultsResponse>} */ async poll(jobId, { timeout = kit_1.Duration.minutes(5), } = { timeout: kit_1.Duration.minutes(5), }) { const frequency = kit_1.env.getNumber('SF_AGENT_TEST_POLLING_FREQUENCY_MS', 1000); const lifecycle = core_1.Lifecycle.getInstance(); const client = await core_1.PollingClient.create({ poll: async () => { const statusResponse = await this.status(jobId); if (statusResponse.status.toLowerCase() !== 'new') { const resultsResponse = await this.results(jobId); const totalTestCases = resultsResponse.testCases.length; const passingTestCases = resultsResponse.testCases.filter((tc) => tc.status.toLowerCase() === 'completed' && tc.testResults.every((r) => r.result === 'PASS')).length; const failingTestCases = resultsResponse.testCases.filter((tc) => ['error', 'completed'].includes(tc.status.toLowerCase()) && tc.testResults.some((r) => r.result === 'FAILURE')).length; if (resultsResponse.status.toLowerCase() === 'completed') { await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { jobId, status: resultsResponse.status, totalTestCases, failingTestCases, passingTestCases, }); return { payload: resultsResponse, completed: true }; } await lifecycle.emit('AGENT_TEST_POLLING_EVENT', { jobId, status: resultsResponse.status, totalTestCases, failingTestCases, passingTestCases, }); } return { completed: false }; }, frequency: kit_1.Duration.milliseconds(frequency), timeout, }); return client.subscribe(); } /** * Get detailed test run results. * * @param {string} jobId * @returns {Promise<AgentTestResultsResponse>} */ async results(jobId) { const url = `/einstein/ai-evaluations/runs/${jobId}/results`; const results = await this.maybeMock.request('GET', url); return normalizeResults(results); } /** * Cancel an in-progress test run. * * @param {string} jobId * @returns {Promise<{success: boolean}>} */ async cancel(jobId) { const url = `/einstein/ai-evaluations/runs/${jobId}/cancel`; return this.maybeMock.request('POST', url); } /** * Creates and deploys an AiEvaluationDefinition from a specification file. * * @param apiName - The API name of the AiEvaluationDefinition to create * @param specFilePath - The path to the specification file to create the definition from * @param options - Configuration options for creating the definition * @param options.outputDir - The directory where the AiEvaluationDefinition file will be written * @param options.preview - If true, writes the AiEvaluationDefinition file to <api-name>-preview-<timestamp>.xml in the current working directory and does not deploy it * * @returns Promise containing: * - path: The filesystem path to the created AiEvaluationDefinition file * - contents: The AiEvaluationDefinition contents as a string * - deployResult: The deployment result (if not in preview mode) * * @throws {SfError} When deployment fails */ async create(apiName, specFilePath, options) { const parsed = (0, yaml_1.parse)(await (0, promises_1.readFile)(specFilePath, 'utf-8')); const lifecycle = core_1.Lifecycle.getInstance(); await lifecycle.emit(exports.AgentTestCreateLifecycleStages.CreatingLocalMetadata, {}); const preview = options.preview ?? false; // outputDir is overridden if preview is true const outputDir = preview ? process.cwd() : options.outputDir; const filename = preview ? `${apiName}-preview-${new Date().toISOString()}.xml` : `${apiName}.aiEvaluationDefinition-meta.xml`; const definitionPath = (0, node_path_1.join)(outputDir, filename); const builder = new fast_xml_parser_1.XMLBuilder({ format: true, attributeNamePrefix: '$', indentBy: ' ', ignoreAttributes: false, }); const xml = builder.build({ AiEvaluationDefinition: { $xmlns: 'http://soap.sforce.com/2006/04/metadata', ...(parsed.description && { description: parsed.description }), name: parsed.name, subjectName: parsed.subjectName, subjectType: parsed.subjectType, ...(parsed.subjectVersion && { subjectVersion: parsed.subjectVersion }), testCase: parsed.testCases.map((tc) => ({ expectation: [ { expectedValue: tc.expectedTopic, name: 'topic_sequence_match', }, { expectedValue: `[${(tc.expectedActions ?? []).map((v) => `"${v}"`).join(',')}]`, name: 'action_sequence_match', }, { expectedValue: tc.expectedOutcome, name: 'bot_response_rating', }, ], inputs: { utterance: tc.utterance, }, number: parsed.testCases.indexOf(tc) + 1, })), }, }); const finalXml = `<?xml version="1.0" encoding="UTF-8"?>\n${xml}`; await (0, promises_1.mkdir)(outputDir, { recursive: true }); await (0, promises_1.writeFile)(definitionPath, finalXml); if (preview) return { path: definitionPath, contents: finalXml, }; const cs = await source_deploy_retrieve_1.ComponentSetBuilder.build({ sourcepath: [definitionPath] }); const deploy = await cs.deploy({ usernameOrConnection: this.connection }); deploy.onUpdate((status) => { if (status.status === source_deploy_retrieve_1.RequestStatus.Pending) { void lifecycle.emit(exports.AgentTestCreateLifecycleStages.Waiting, status); } else { void lifecycle.emit(exports.AgentTestCreateLifecycleStages.DeployingMetadata, status); } }); deploy.onFinish((result) => { // small deploys like this, 1 file, can happen without an 'update' event being fired // onFinish, emit the update, and then the done event to create proper output void lifecycle.emit(exports.AgentTestCreateLifecycleStages.DeployingMetadata, result); void lifecycle.emit(exports.AgentTestCreateLifecycleStages.Done, result); }); const result = await deploy.pollStatus({ timeout: kit_1.Duration.minutes(10_000), frequency: kit_1.Duration.seconds(1) }); if (!result.response.success) { throw new core_1.SfError(result.response.errorMessage ?? `Unable to deploy ${result.response.id}`); } return { path: definitionPath, contents: finalXml, deployResult: result }; } } exports.AgentTester = AgentTester; /** * Convert the raw, detailed test results to another format. * * @param results The detailed results from a test run. * @param format The desired format. One of: json, junit, or tap. * @returns */ async function convertTestResultsToFormat(results, format) { switch (format) { case 'json': return jsonFormat(results); case 'junit': return junitFormat(results); case 'tap': return tapFormat(results); default: throw new Error(`Unsupported format: ${format}`); } } /** * Normalizes test results by decoding HTML entities in utterances and test result values. * * @param results - The agent test results response object to normalize * @returns A new AgentTestResultsResponse with decoded HTML entities * * @example * ``` * const results = { * testCases: [{ * inputs: { utterance: ""hello"" }, * testResults: [{ * actualValue: "&test", * expectedValue: "<value>" * }] * }] * }; * const normalized = normalizeResults(results); * ``` */ function normalizeResults(results) { return { ...results, testCases: results.testCases.map((tc) => ({ ...tc, inputs: { utterance: (0, utils_1.decodeHtmlEntities)(tc.inputs.utterance), }, testResults: tc.testResults.map((r) => ({ ...r, actualValue: (0, utils_1.decodeHtmlEntities)(r.actualValue), expectedValue: (0, utils_1.decodeHtmlEntities)(r.expectedValue), })), })), }; } async function jsonFormat(results) { return Promise.resolve(JSON.stringify(results, null, 2)); } async function junitFormat(results) { const builder = new fast_xml_parser_1.XMLBuilder({ format: true, attributeNamePrefix: '$', ignoreAttributes: false, }); const testCount = results.testCases.length; const failureCount = results.testCases.filter((tc) => ['error', 'completed'].includes(tc.status.toLowerCase()) && tc.testResults.some((r) => r.result === 'FAILURE')).length; const time = results.testCases.reduce((acc, tc) => { if (tc.endTime && tc.startTime) { return acc + new Date(tc.endTime).getTime() - new Date(tc.startTime).getTime(); } return acc; }, 0); const suites = builder.build({ testsuites: { $name: results.subjectName, $tests: testCount, $failures: failureCount, $time: time, property: [ { $name: 'status', $value: results.status }, { $name: 'start-time', $value: results.startTime }, { $name: 'end-time', $value: results.endTime }, ], testsuite: results.testCases.map((testCase) => { const testCaseTime = testCase.endTime ? new Date(testCase.endTime).getTime() - new Date(testCase.startTime).getTime() : 0; return { $name: testCase.testNumber, $time: testCaseTime, $assertions: testCase.testResults.length, failure: testCase.testResults .map((r) => { if (r.result === 'FAILURE') { return { $message: r.errorMessage ?? 'Unknown error', $name: r.name }; } }) .filter((f) => f), }; }), }, }); return Promise.resolve(`<?xml version="1.0" encoding="UTF-8"?>\n${suites}`.trim()); } function humanFriendlyName(name) { // topic_sequence_match, action_sequence_match, and bot_response_rating have all changed // eventually we can remove them switch (name) { case 'topic_sequence_match': case 'topic_assertion': return 'Topic'; case 'action_sequence_match': case 'actions_assertion': return 'Action'; case 'output_latency_milliseconds': return 'Output Latency'; case 'instruction_following': return 'Instruction Following'; case 'bot_response_rating': case 'output_validation': return 'Outcome'; default: return name; } } async function tapFormat(results) { const lines = []; let expectationCount = 0; for (const testCase of results.testCases) { for (const result of testCase.testResults) { const status = result.result === 'PASS' ? 'ok' : 'not ok'; expectationCount++; lines.push(`${status} ${expectationCount} ${testCase.testNumber}.${result.name}`); if (status === 'not ok') { lines.push(' ---'); lines.push(` message: ${result.errorMessage ?? 'Unknown error'}`); lines.push(` expectation: ${result.name}`); lines.push(` actual: ${result.actualValue}`); lines.push(` expected: ${result.expectedValue}`); lines.push(' ...'); } } } return Promise.resolve(`Tap Version 14\n1..${expectationCount}\n${lines.join('\n')}`); } function transformStringToArray(str) { try { if (!str) return []; // Remove any whitespace and ensure proper JSON format const cleaned = str.replace(/\s+/g, ''); return JSON.parse(cleaned); } catch { return []; } } function castArray(value) { return Array.isArray(value) ? value : [value]; } /** * Generate a test specification file in YAML format. * This function takes a test specification object, cleans it by removing undefined and empty string values, * converts it to YAML format, and writes it to the specified output file. * * @param spec - The test specification object to be converted to YAML. * @param outputFile - The file path where the YAML output should be written. * @throws {Error} - May throw an error if file operations fail. * @returns A Promise that resolves when the file has been written. */ async function writeTestSpec(spec, outputFile) { // strip out undefined values and empty strings const clean = Object.entries(spec).reduce((acc, [key, value]) => { if (value !== undefined && value !== '') return { ...acc, [key]: value }; return acc; }, {}); const yml = (0, yaml_1.stringify)(clean, undefined, { minContentWidth: 0, lineWidth: 0, }); await (0, promises_1.mkdir)((0, node_path_1.dirname)(outputFile), { recursive: true }); await (0, promises_1.writeFile)(outputFile, yml); } /** * Generates a TestSpec object from an AI Evaluation Definition XML file. * * @param path - The file path to the AI Evaluation Definition XML file. * @returns Promise that resolves to a TestSpec object containing the parsed evaluation definition data. * @description Reads and parses an XML file containing AIEvaluationDefinition, converting it into a structured TestSpec format. * * @throws {Error} If the file cannot be read or parsed. */ async function generateTestSpecFromAiEvalDefinition(path) { const xml = await (0, promises_1.readFile)(path, 'utf-8'); const parser = new fast_xml_parser_1.XMLParser(); const parsed = parser.parse(xml); return { name: parsed.AiEvaluationDefinition.name, description: parsed.AiEvaluationDefinition.description, subjectType: parsed.AiEvaluationDefinition.subjectType, subjectName: parsed.AiEvaluationDefinition.subjectName, subjectVersion: parsed.AiEvaluationDefinition.subjectVersion, testCases: castArray(parsed.AiEvaluationDefinition.testCase).map((tc) => { const expectations = castArray(tc.expectation); return { utterance: tc.inputs.utterance, // TODO: remove old names once removed in 258 (topic_sequence_match, action_sequence_match, bot_response_rating) expectedTopic: expectations.find((e) => e.name === 'topic_sequence_match' || e.name === 'topic_assertion') ?.expectedValue, expectedActions: transformStringToArray(expectations.find((e) => e.name === 'action_sequence_match' || e.name === 'actions_assertion')?.expectedValue), expectedOutcome: expectations.find((e) => e.name === 'bot_response_rating' || e.name === 'output_validation') ?.expectedValue, }; }), }; } //# sourceMappingURL=agentTester.js.map