UNPKG

@twilio-alpha/assistants-eval

Version:

promptfoo extension for writing AI evaluations for Twilio AI Assistants

180 lines 7.2 kB
import pino from 'pino'; import { providers, } from 'promptfoo'; import TwilioProvider from './twilio.js'; function getUserSystemPrompt(instructions) { return ` You are a user interacting with an agent. ${instructions ? `\n\nInstruction: ${instructions}\n` : ''} Rules: - Just generate one line at a time to simulate the user's message. - Do not give away all the instruction at once. Only provide the information that is necessary for the current step. - Do not hallucinate information that is not provided in the instruction. For example, if the agent asks for the order id but it is not mentioned in the instruction, do not make up an order id, just say you do not remember or have it. - If the instruction goal is satisfied, generate '###STOP###' as a standalone message without anything else to end the conversation. - Do not repeat the exact instruction in the conversation. Instead, use your own words to convey the same information. - Try to make the conversation as natural as possible, and stick to the personalities in the instruction.""" `.trim(); } export class AgentProvider { identifier; maxTurns; userProviderOptions; agentProviderOptions; userProvider; agentProvider; logger; constructor({ id, label, config }) { this.identifier = id ?? label ?? 'agent-provider'; const { agentProvider, userProvider } = config; this.userProviderOptions = userProvider; this.agentProviderOptions = agentProvider; this.maxTurns = config.maxTurns ?? 10; this.logger = pino({ level: process.env.LOG_LEVEL || 'info', }).child({ module: this.constructor.name, }); } get agentProviderInstance() { return this.agentProvider; } async loadProvider(options) { if (typeof options === 'string') { options = { id: options }; } if (typeof options === 'undefined') { return providers.loadApiProvider('openai:chat:gpt-4o-mini'); } if (!options.id) { throw new Error('Provider ID is required'); } // need to do this because otherwise we might end up in an infinite loop for TwilioAgentProvider if (options.id === 'package:@twilio-alpha/assistants-eval:TwilioProvider') { return new TwilioProvider(options); } return providers.loadApiProvider(options.id, { options, }); } async ensureProviders() { if (!this.userProvider) { this.userProvider = await this.loadProvider(this.userProviderOptions); } if (!this.agentProvider) { this.agentProvider = await this.loadProvider(this.agentProviderOptions); } } id() { return this.identifier; } async sendMessageToUser(messages, context, systemPrompt) { if (!this.userProvider) { throw new Error('User provider not loaded'); } // Since we are sending this to another AI that sees itself as the `assistant` we need to flip the roles of the messages // so any user message becomes an assistant message and vice versa const flippedMessages = messages.map((message) => { return { role: message.role === 'user' ? 'assistant' : 'user', content: message.content, }; }); const response = await this.userProvider.callApi(JSON.stringify([ { role: 'system', content: systemPrompt, }, ...flippedMessages, ]), context); this.logger.debug('User: %s', response.output); return { messages: [...messages, { role: 'user', content: response.output }], metadata: response.metadata, }; } async sendMessageToAgent(messages, context, systemPrompt) { if (!this.agentProvider) { throw new Error('Agent provider not loaded'); } let fullMessageList = []; if (systemPrompt) { fullMessageList.push({ role: 'system', content: systemPrompt }); } fullMessageList = fullMessageList.concat(messages); const response = await this.agentProvider.callApi(JSON.stringify(fullMessageList), context); this.logger.debug('Assistant: %s', response.output); return { messages: [...messages, { role: 'assistant', content: response.output }], metadata: response.metadata, }; } async callApi(prompt, context, _callApiOptions) { await this.ensureProviders(); if (!this.userProvider || !this.agentProvider) { throw new Error('Providers not loaded'); } const userSystemPrompt = getUserSystemPrompt(prompt); let messages = []; let iterations = 0; // eslint-disable-next-line let maxTurns = this.maxTurns; let metadata = {}; const { maxTurns: varMaxTurns, agentPrompt, ...redactedVars } = context?.vars ?? {}; if (varMaxTurns && typeof varMaxTurns !== 'object') { maxTurns = typeof varMaxTurns === 'number' ? varMaxTurns : parseInt(varMaxTurns, 10); if (Number.isNaN(maxTurns)) { maxTurns = this.maxTurns; } } const userContext = { vars: { ...redactedVars, }, prompt: { label: '{{prompt}}', raw: '{{prompt}}', }, }; const agentContext = { vars: { ...redactedVars, }, prompt: { label: '{{prompt}}', raw: '{{prompt}}', }, }; while (iterations < maxTurns) { // eslint-disable-next-line no-await-in-loop const response = await this.sendMessageToUser(messages, userContext, userSystemPrompt); messages = response.messages; metadata = { ...metadata, ...response.metadata }; const lastUserMessage = messages[messages.length - 1]; if (lastUserMessage.content.includes('###STOP###')) { break; } // eslint-disable-next-line no-await-in-loop const aiResponse = await this.sendMessageToAgent(messages, agentContext, typeof agentPrompt === 'string' ? agentPrompt : undefined); messages = aiResponse.messages; metadata = { ...metadata, ...aiResponse.metadata }; iterations += 1; const lastAssistantMessage = messages[messages.length - 1]; if (lastAssistantMessage.content.includes('###STOP###')) { break; } } return { output: messages .map((message) => `${message.role === 'assistant' ? 'Assistant' : 'User'}: ${message.content}`) .join('\n---\n'), raw: JSON.stringify(messages), metadata, }; } toString() { return 'AgentProvider'; } } export default AgentProvider; //# sourceMappingURL=agent.js.map