@twilio-alpha/assistants-eval
Version:
promptfoo extension for writing AI evaluations for Twilio AI Assistants
180 lines • 7.2 kB
JavaScript
import pino from 'pino';
import { providers, } from 'promptfoo';
import TwilioProvider from './twilio.js';
function getUserSystemPrompt(instructions) {
return `
You are a user interacting with an agent. ${instructions ? `\n\nInstruction: ${instructions}\n` : ''}
Rules:
- Just generate one line at a time to simulate the user's message.
- Do not give away all the instruction at once. Only provide the information that is necessary for the current step.
- Do not hallucinate information that is not provided in the instruction. For example, if the agent asks for the order id but it is not mentioned in the instruction, do not make up an order id, just say you do not remember or have it.
- If the instruction goal is satisfied, generate '###STOP###' as a standalone message without anything else to end the conversation.
- Do not repeat the exact instruction in the conversation. Instead, use your own words to convey the same information.
- Try to make the conversation as natural as possible, and stick to the personalities in the instruction."""
`.trim();
}
export class AgentProvider {
identifier;
maxTurns;
userProviderOptions;
agentProviderOptions;
userProvider;
agentProvider;
logger;
constructor({ id, label, config }) {
this.identifier = id ?? label ?? 'agent-provider';
const { agentProvider, userProvider } = config;
this.userProviderOptions = userProvider;
this.agentProviderOptions = agentProvider;
this.maxTurns = config.maxTurns ?? 10;
this.logger = pino({
level: process.env.LOG_LEVEL || 'info',
}).child({
module: this.constructor.name,
});
}
get agentProviderInstance() {
return this.agentProvider;
}
async loadProvider(options) {
if (typeof options === 'string') {
options = { id: options };
}
if (typeof options === 'undefined') {
return providers.loadApiProvider('openai:chat:gpt-4o-mini');
}
if (!options.id) {
throw new Error('Provider ID is required');
}
// need to do this because otherwise we might end up in an infinite loop for TwilioAgentProvider
if (options.id === 'package:@twilio-alpha/assistants-eval:TwilioProvider') {
return new TwilioProvider(options);
}
return providers.loadApiProvider(options.id, {
options,
});
}
async ensureProviders() {
if (!this.userProvider) {
this.userProvider = await this.loadProvider(this.userProviderOptions);
}
if (!this.agentProvider) {
this.agentProvider = await this.loadProvider(this.agentProviderOptions);
}
}
id() {
return this.identifier;
}
async sendMessageToUser(messages, context, systemPrompt) {
if (!this.userProvider) {
throw new Error('User provider not loaded');
}
// Since we are sending this to another AI that sees itself as the `assistant` we need to flip the roles of the messages
// so any user message becomes an assistant message and vice versa
const flippedMessages = messages.map((message) => {
return {
role: message.role === 'user' ? 'assistant' : 'user',
content: message.content,
};
});
const response = await this.userProvider.callApi(JSON.stringify([
{
role: 'system',
content: systemPrompt,
},
...flippedMessages,
]), context);
this.logger.debug('User: %s', response.output);
return {
messages: [...messages, { role: 'user', content: response.output }],
metadata: response.metadata,
};
}
async sendMessageToAgent(messages, context, systemPrompt) {
if (!this.agentProvider) {
throw new Error('Agent provider not loaded');
}
let fullMessageList = [];
if (systemPrompt) {
fullMessageList.push({ role: 'system', content: systemPrompt });
}
fullMessageList = fullMessageList.concat(messages);
const response = await this.agentProvider.callApi(JSON.stringify(fullMessageList), context);
this.logger.debug('Assistant: %s', response.output);
return {
messages: [...messages, { role: 'assistant', content: response.output }],
metadata: response.metadata,
};
}
async callApi(prompt, context, _callApiOptions) {
await this.ensureProviders();
if (!this.userProvider || !this.agentProvider) {
throw new Error('Providers not loaded');
}
const userSystemPrompt = getUserSystemPrompt(prompt);
let messages = [];
let iterations = 0;
// eslint-disable-next-line
let maxTurns = this.maxTurns;
let metadata = {};
const { maxTurns: varMaxTurns, agentPrompt, ...redactedVars } = context?.vars ?? {};
if (varMaxTurns && typeof varMaxTurns !== 'object') {
maxTurns =
typeof varMaxTurns === 'number'
? varMaxTurns
: parseInt(varMaxTurns, 10);
if (Number.isNaN(maxTurns)) {
maxTurns = this.maxTurns;
}
}
const userContext = {
vars: {
...redactedVars,
},
prompt: {
label: '{{prompt}}',
raw: '{{prompt}}',
},
};
const agentContext = {
vars: {
...redactedVars,
},
prompt: {
label: '{{prompt}}',
raw: '{{prompt}}',
},
};
while (iterations < maxTurns) {
// eslint-disable-next-line no-await-in-loop
const response = await this.sendMessageToUser(messages, userContext, userSystemPrompt);
messages = response.messages;
metadata = { ...metadata, ...response.metadata };
const lastUserMessage = messages[messages.length - 1];
if (lastUserMessage.content.includes('###STOP###')) {
break;
}
// eslint-disable-next-line no-await-in-loop
const aiResponse = await this.sendMessageToAgent(messages, agentContext, typeof agentPrompt === 'string' ? agentPrompt : undefined);
messages = aiResponse.messages;
metadata = { ...metadata, ...aiResponse.metadata };
iterations += 1;
const lastAssistantMessage = messages[messages.length - 1];
if (lastAssistantMessage.content.includes('###STOP###')) {
break;
}
}
return {
output: messages
.map((message) => `${message.role === 'assistant' ? 'Assistant' : 'User'}: ${message.content}`)
.join('\n---\n'),
raw: JSON.stringify(messages),
metadata,
};
}
toString() {
return 'AgentProvider';
}
}
export default AgentProvider;
//# sourceMappingURL=agent.js.map