@twilio-alpha/assistants-eval
Version:
promptfoo extension for writing AI evaluations for Twilio AI Assistants
219 lines (201 loc) • 5.41 kB
text/typescript
import { AssertionValueFunctionContext } from 'promptfoo';
import { describe, expect, it, vi } from 'vitest';
import { TwilioProvider } from '../providers';
import { TwilioProviderOptions } from '../providers/twilio';
import {
findAllToolCalls,
findToolCallsForResponse,
Message,
usedTool,
} from './usedTool';
describe('usedTool', () => {
const options: TwilioProviderOptions = {
config: {},
env: {},
};
it('should return false if context is not TwilioProviderContext', async () => {
const context = {
provider: {},
providerResponse: {},
} as unknown as AssertionValueFunctionContext;
const result = await usedTool('', context);
expect(result).toEqual({
pass: false,
score: 0,
reason:
'Assertion can only be used in with TwilioProvider or TwilioAgentProvider',
});
});
it('should return false if providerResponse does not have sessionId', async () => {
const context = {
provider: new TwilioProvider(options),
providerResponse: {},
} as unknown as AssertionValueFunctionContext;
const result = await usedTool('', context);
expect(result).toEqual({
pass: false,
score: 0,
reason:
'Assertion can only be used in with TwilioProvider or TwilioAgentProvider',
});
});
it('should return tool calls from history', async () => {
const context = {
provider: new TwilioProvider(options),
providerResponse: {
metadata: {
sessionId: 'sessionId',
},
},
config: {
expectedTools: [
{
name: 'toolName',
},
],
},
} as unknown as AssertionValueFunctionContext;
vi.spyOn(global, 'fetch').mockResolvedValue({
json: vi.fn().mockResolvedValue({
messages: [
{
role: 'tool',
content: {
input: 'input',
output: 'output',
name: 'toolName',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
],
meta: {
first_page_url: null,
next_page_url: null,
previous_page_url: null,
url: 'url',
key: 'messages',
page: 1,
page_size: 1,
},
}),
} as any);
const result = await usedTool('', context);
expect(result).toMatchObject({
pass: true,
score: 1,
reason: 'Tools used',
});
});
});
describe('findAllToolCalls', () => {
it('should return all tool calls from messages', () => {
const messages: Message[] = [
{
role: 'tool',
content: {
input: 'input',
output: 'output',
name: 'toolName',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
{
role: 'user',
content: {
content: 'user message',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
];
const result = findAllToolCalls(messages);
expect(result).toEqual([messages[0]]);
});
});
describe('findToolCallsForResponse', () => {
it('should return tool calls for a specific response', () => {
const messages: Message[] = [
{
role: 'assistant',
content: {
content: 'assistant response',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {
tokens: {
completionTokens: 10,
promptTokens: 10,
totalTokens: 20,
},
},
},
{
role: 'tool',
content: {
input: 'input',
output: 'output',
name: 'toolName',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
{
role: 'tool',
content: {
input: 'input2',
output: 'output2',
name: 'toolNameB',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
{
role: 'user',
content: {
content: 'user message',
},
account_sid: 'account_sid',
assistant_id: 'assistant_id',
date_created: 'date_created',
date_updated: 'date_updated',
id: 'id',
identity: 'identity',
meta: {},
},
];
const result = findToolCallsForResponse(messages, 'assistant response');
expect(result).toEqual([messages[1], messages[2]]);
});
});