@future-agi/ai-evaluation
Version:
We help GenAI teams maintain high-accuracy for their Models in production.
146 lines • 7.23 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { Evaluator } from '../evaluator';
import { HttpMethod } from '@future-agi/sdk';
import { Templates } from '../templates';
// Mock the entire APIKeyAuth class from the SDK to spy on the 'request' method
jest.mock('@future-agi/sdk', () => {
const originalModule = jest.requireActual('@future-agi/sdk');
return Object.assign(Object.assign({}, originalModule), { APIKeyAuth: class {
constructor(options) {
this.baseUrl = 'https://api.futureagi.com';
this._defaultTimeout = 200;
this.request = jest.fn();
}
} });
});
describe('Evaluator', () => {
let evaluator;
let mockRequest;
beforeEach(() => {
evaluator = new Evaluator({ fiApiKey: process.env.FI_API_KEY, fiSecretKey: process.env.FI_SECRET_KEY });
// Get the mock instance of the request method for each test
mockRequest = evaluator.request;
mockRequest.mockClear();
});
describe('constructor', () => {
it('should initialize correctly', () => {
expect(evaluator).toBeInstanceOf(Evaluator);
});
});
describe('evaluate', () => {
const mockInputs = { query: ["test query"], response: ["test response"] };
const mockBatchResult = {
eval_results: [{
data: {},
failure: false,
reason: "",
runtime: 100,
metrics: [{ id: 'metric1', value: 1 }]
}]
};
it('should perform a successful evaluation', () => __awaiter(void 0, void 0, void 0, function* () {
mockRequest.mockResolvedValue(mockBatchResult);
const result = yield evaluator.evaluate('factual_accuracy', mockInputs, { modelName: 'test-model' });
expect(mockRequest).toHaveBeenCalledTimes(1);
const callArgs = mockRequest.mock.calls[0];
expect(callArgs[0]).toEqual({
method: HttpMethod.POST,
url: 'https://api.futureagi.com/sdk/api/v1/new-eval/',
json: {
eval_name: 'factual_accuracy',
inputs: mockInputs,
trace_eval: false,
custom_eval_name: undefined,
model: 'test-model',
span_id: undefined
},
timeout: NaN
});
expect(typeof callArgs[1]).toBe('function');
expect(result).toEqual(mockBatchResult);
}));
it('should throw an error for invalid eval_templates', () => __awaiter(void 0, void 0, void 0, function* () {
yield expect(evaluator.evaluate({}, mockInputs, { modelName: 'test-model' })).rejects.toThrow('Unsupported eval_templates argument.');
}));
it('should correctly transform inputs for the API payload when provided as a dict of strings', () => __awaiter(void 0, void 0, void 0, function* () {
mockRequest.mockResolvedValue(mockBatchResult);
const singleInput = { query: "q1", response: "r1" };
yield evaluator.evaluate('factual_accuracy', singleInput, { modelName: 'test-model' });
expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({
json: expect.objectContaining({
inputs: {
query: ["q1"],
response: ["r1"]
}
})
}), expect.any(Function));
}));
});
describe('list_evaluations', () => {
it('should return a list of evaluations', () => __awaiter(void 0, void 0, void 0, function* () {
const mockEvalList = [{ name: 'factual_accuracy', id: '1' }, { name: 'toxicity', id: '2' }];
mockRequest.mockResolvedValue(mockEvalList);
const result = yield evaluator.list_evaluations();
expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({
method: HttpMethod.GET,
url: expect.stringContaining('/get-evals'),
}), expect.any(Function) // EvalInfoResponseHandler
);
expect(result).toEqual(mockEvalList);
}));
});
describe('_get_eval_info caching', () => {
it('should cache the results of _get_eval_info', () => __awaiter(void 0, void 0, void 0, function* () {
const mockEvalList = [
{ name: 'factual_accuracy', eval_id: '1' },
{ name: 'toxicity', eval_id: '2' },
];
mockRequest.mockResolvedValue(mockEvalList);
// Access the private method for testing purposes
const getEvalInfo = evaluator._get_eval_info.bind(evaluator);
// First call
const result1 = yield getEvalInfo('factual_accuracy');
expect(result1).toEqual(mockEvalList[0]);
expect(mockRequest).toHaveBeenCalledTimes(1);
// Second call - should hit the cache
const result2 = yield getEvalInfo('factual_accuracy');
expect(result2).toEqual(mockEvalList[0]);
expect(mockRequest).toHaveBeenCalledTimes(1); // Should not be called again
// Third call with a different eval_name - should make a new request
const result3 = yield getEvalInfo('toxicity');
expect(result3).toEqual(mockEvalList[1]);
expect(mockRequest).toHaveBeenCalledTimes(2);
}));
});
describe('evaluate every available eval_name', () => {
// Collect all eval_name strings defined in Templates
const evalNames = Object.values(Templates).map(t => t.eval_name);
const dummyBatchResult = {
eval_results: [{
data: {},
failure: false,
reason: "",
runtime: 0,
metrics: []
}]
};
it.each(evalNames)('should construct payload for %s', (evalName) => __awaiter(void 0, void 0, void 0, function* () {
mockRequest.mockResolvedValue(dummyBatchResult);
yield evaluator.evaluate(evalName, { input: 'example', response: 'example' }, { modelName: 'test-model' });
expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({
json: expect.objectContaining({
eval_name: evalName
})
}), expect.any(Function));
}));
});
});
//# sourceMappingURL=evaluator.test.js.map