UNPKG

@twilio-alpha/assistants-eval

Version:

promptfoo extension for writing AI evaluations for Twilio AI Assistants

276 lines (230 loc) 7.79 kB
import pino from 'pino'; import { ApiProvider, CallApiContextParams, CallApiOptionsParams, ProviderOptions, ProviderResponse, providers, } from 'promptfoo'; import TwilioProvider, { TwilioProviderOptions } from './twilio'; function getUserSystemPrompt(instructions: string) { return ` You are a user interacting with an agent. ${instructions ? `\n\nInstruction: ${instructions}\n` : ''} Rules: - Just generate one line at a time to simulate the user's message. - Do not give away all the instruction at once. Only provide the information that is necessary for the current step. - Do not hallucinate information that is not provided in the instruction. For example, if the agent asks for the order id but it is not mentioned in the instruction, do not make up an order id, just say you do not remember or have it. - If the instruction goal is satisfied, generate '###STOP###' as a standalone message without anything else to end the conversation. - Do not repeat the exact instruction in the conversation. Instead, use your own words to convey the same information. - Try to make the conversation as natural as possible, and stick to the personalities in the instruction.""" `.trim(); } export type Message = { role: 'user' | 'assistant' | 'system'; content: string; }; export type AgentProviderOptions = ProviderOptions & { config: { userProvider?: ProviderOptions; agentProvider?: ProviderOptions; maxTurns?: number; }; }; export class AgentProvider implements ApiProvider { private readonly identifier: string; private readonly maxTurns: number; private readonly userProviderOptions?: ProviderOptions; private readonly agentProviderOptions?: ProviderOptions; protected userProvider?: ApiProvider; protected agentProvider?: ApiProvider; private readonly logger: pino.Logger; constructor({ id, label, config }: AgentProviderOptions) { this.identifier = id ?? label ?? 'agent-provider'; const { agentProvider, userProvider } = config; this.userProviderOptions = userProvider; this.agentProviderOptions = agentProvider; this.maxTurns = config.maxTurns ?? 10; this.logger = pino({ level: process.env.LOG_LEVEL || 'info', }).child({ module: this.constructor.name, }); } get agentProviderInstance() { return this.agentProvider; } private async loadProvider(options?: ProviderOptions): Promise<ApiProvider> { if (typeof options === 'string') { options = { id: options }; } if (typeof options === 'undefined') { return providers.loadApiProvider('openai:chat:gpt-4o-mini'); } if (!options.id) { throw new Error('Provider ID is required'); } // need to do this because otherwise we might end up in an infinite loop for TwilioAgentProvider if (options.id === 'package:@twilio-alpha/assistants-eval:TwilioProvider') { return new TwilioProvider(options as TwilioProviderOptions); } return providers.loadApiProvider(options.id, { options, }); } protected async ensureProviders(): Promise<void> { if (!this.userProvider) { this.userProvider = await this.loadProvider(this.userProviderOptions); } if (!this.agentProvider) { this.agentProvider = await this.loadProvider(this.agentProviderOptions); } } id() { return this.identifier; } private async sendMessageToUser( messages: Message[], context: CallApiContextParams, systemPrompt: string, ): Promise<{ messages: Message[]; metadata: ProviderResponse['metadata'] }> { if (!this.userProvider) { throw new Error('User provider not loaded'); } // Since we are sending this to another AI that sees itself as the `assistant` we need to flip the roles of the messages // so any user message becomes an assistant message and vice versa const flippedMessages = messages.map((message) => { return { role: message.role === 'user' ? 'assistant' : 'user', content: message.content, }; }); const response = await this.userProvider.callApi( JSON.stringify([ { role: 'system', content: systemPrompt, }, ...flippedMessages, ]), context, ); this.logger.debug('User: %s', response.output); return { messages: [...messages, { role: 'user', content: response.output }], metadata: response.metadata, }; } private async sendMessageToAgent( messages: Message[], context: CallApiContextParams, systemPrompt?: string, ): Promise<{ messages: Message[]; metadata: ProviderResponse['metadata'] }> { if (!this.agentProvider) { throw new Error('Agent provider not loaded'); } let fullMessageList: Message[] = []; if (systemPrompt) { fullMessageList.push({ role: 'system', content: systemPrompt }); } fullMessageList = fullMessageList.concat(messages); const response = await this.agentProvider.callApi( JSON.stringify(fullMessageList), context, ); this.logger.debug('Assistant: %s', response.output); return { messages: [...messages, { role: 'assistant', content: response.output }], metadata: response.metadata, }; } async callApi( prompt: string, context?: CallApiContextParams, _callApiOptions?: CallApiOptionsParams, ): Promise<ProviderResponse> { await this.ensureProviders(); if (!this.userProvider || !this.agentProvider) { throw new Error('Providers not loaded'); } const userSystemPrompt = getUserSystemPrompt(prompt); let messages: Message[] = []; let iterations = 0; // eslint-disable-next-line let maxTurns = this.maxTurns; let metadata = {}; const { maxTurns: varMaxTurns, agentPrompt, ...redactedVars } = context?.vars ?? {}; if (varMaxTurns && typeof varMaxTurns !== 'object') { maxTurns = typeof varMaxTurns === 'number' ? varMaxTurns : parseInt(varMaxTurns, 10); if (Number.isNaN(maxTurns)) { maxTurns = this.maxTurns; } } const userContext: CallApiContextParams = { vars: { ...redactedVars, }, prompt: { label: '{{prompt}}', raw: '{{prompt}}', }, }; const agentContext: CallApiContextParams = { vars: { ...redactedVars, }, prompt: { label: '{{prompt}}', raw: '{{prompt}}', }, }; while (iterations < maxTurns) { // eslint-disable-next-line no-await-in-loop const response = await this.sendMessageToUser( messages, userContext, userSystemPrompt, ); messages = response.messages; metadata = { ...metadata, ...response.metadata }; const lastUserMessage = messages[messages.length - 1]; if (lastUserMessage.content.includes('###STOP###')) { break; } // eslint-disable-next-line no-await-in-loop const aiResponse = await this.sendMessageToAgent( messages, agentContext, typeof agentPrompt === 'string' ? agentPrompt : undefined, ); messages = aiResponse.messages; metadata = { ...metadata, ...aiResponse.metadata }; iterations += 1; const lastAssistantMessage = messages[messages.length - 1]; if (lastAssistantMessage.content.includes('###STOP###')) { break; } } return { output: messages .map( (message) => `${message.role === 'assistant' ? 'Assistant' : 'User'}: ${message.content}`, ) .join('\n---\n'), raw: JSON.stringify(messages), metadata, }; } toString() { return 'AgentProvider'; } } export default AgentProvider;