UNPKG

@future-agi/ai-evaluation

Version:

We help GenAI teams maintain high-accuracy for their Models in production.

github.com/futureagi/ai-evaluation

futureagi/ai-evaluation

146 lines • 7.23 kB

JavaScript

var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { Evaluator } from '../evaluator'; import { HttpMethod } from '@future-agi/sdk'; import { Templates } from '../templates'; // Mock the entire APIKeyAuth class from the SDK to spy on the 'request' method jest.mock('@future-agi/sdk', () => { const originalModule = jest.requireActual('@future-agi/sdk'); return Object.assign(Object.assign({}, originalModule), { APIKeyAuth: class { constructor(options) { this.baseUrl = 'https://api.futureagi.com'; this._defaultTimeout = 200; this.request = jest.fn(); } } }); }); describe('Evaluator', () => { let evaluator; let mockRequest; beforeEach(() => { evaluator = new Evaluator({ fiApiKey: process.env.FI_API_KEY, fiSecretKey: process.env.FI_SECRET_KEY }); // Get the mock instance of the request method for each test mockRequest = evaluator.request; mockRequest.mockClear(); }); describe('constructor', () => { it('should initialize correctly', () => { expect(evaluator).toBeInstanceOf(Evaluator); }); }); describe('evaluate', () => { const mockInputs = { query: ["test query"], response: ["test response"] }; const mockBatchResult = { eval_results: [{ data: {}, failure: false, reason: "", runtime: 100, metrics: [{ id: 'metric1', value: 1 }] }] }; it('should perform a successful evaluation', () => __awaiter(void 0, void 0, void 0, function* () { mockRequest.mockResolvedValue(mockBatchResult); const result = yield evaluator.evaluate('factual_accuracy', mockInputs, { modelName: 'test-model' }); expect(mockRequest).toHaveBeenCalledTimes(1); const callArgs = mockRequest.mock.calls[0]; expect(callArgs[0]).toEqual({ method: HttpMethod.POST, url: 'https://api.futureagi.com/sdk/api/v1/new-eval/', json: { eval_name: 'factual_accuracy', inputs: mockInputs, trace_eval: false, custom_eval_name: undefined, model: 'test-model', span_id: undefined }, timeout: NaN }); expect(typeof callArgs[1]).toBe('function'); expect(result).toEqual(mockBatchResult); })); it('should throw an error for invalid eval_templates', () => __awaiter(void 0, void 0, void 0, function* () { yield expect(evaluator.evaluate({}, mockInputs, { modelName: 'test-model' })).rejects.toThrow('Unsupported eval_templates argument.'); })); it('should correctly transform inputs for the API payload when provided as a dict of strings', () => __awaiter(void 0, void 0, void 0, function* () { mockRequest.mockResolvedValue(mockBatchResult); const singleInput = { query: "q1", response: "r1" }; yield evaluator.evaluate('factual_accuracy', singleInput, { modelName: 'test-model' }); expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({ json: expect.objectContaining({ inputs: { query: ["q1"], response: ["r1"] } }) }), expect.any(Function)); })); }); describe('list_evaluations', () => { it('should return a list of evaluations', () => __awaiter(void 0, void 0, void 0, function* () { const mockEvalList = [{ name: 'factual_accuracy', id: '1' }, { name: 'toxicity', id: '2' }]; mockRequest.mockResolvedValue(mockEvalList); const result = yield evaluator.list_evaluations(); expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({ method: HttpMethod.GET, url: expect.stringContaining('/get-evals'), }), expect.any(Function) // EvalInfoResponseHandler ); expect(result).toEqual(mockEvalList); })); }); describe('_get_eval_info caching', () => { it('should cache the results of _get_eval_info', () => __awaiter(void 0, void 0, void 0, function* () { const mockEvalList = [ { name: 'factual_accuracy', eval_id: '1' }, { name: 'toxicity', eval_id: '2' }, ]; mockRequest.mockResolvedValue(mockEvalList); // Access the private method for testing purposes const getEvalInfo = evaluator._get_eval_info.bind(evaluator); // First call const result1 = yield getEvalInfo('factual_accuracy'); expect(result1).toEqual(mockEvalList[0]); expect(mockRequest).toHaveBeenCalledTimes(1); // Second call - should hit the cache const result2 = yield getEvalInfo('factual_accuracy'); expect(result2).toEqual(mockEvalList[0]); expect(mockRequest).toHaveBeenCalledTimes(1); // Should not be called again // Third call with a different eval_name - should make a new request const result3 = yield getEvalInfo('toxicity'); expect(result3).toEqual(mockEvalList[1]); expect(mockRequest).toHaveBeenCalledTimes(2); })); }); describe('evaluate every available eval_name', () => { // Collect all eval_name strings defined in Templates const evalNames = Object.values(Templates).map(t => t.eval_name); const dummyBatchResult = { eval_results: [{ data: {}, failure: false, reason: "", runtime: 0, metrics: [] }] }; it.each(evalNames)('should construct payload for %s', (evalName) => __awaiter(void 0, void 0, void 0, function* () { mockRequest.mockResolvedValue(dummyBatchResult); yield evaluator.evaluate(evalName, { input: 'example', response: 'example' }, { modelName: 'test-model' }); expect(mockRequest).toHaveBeenCalledWith(expect.objectContaining({ json: expect.objectContaining({ eval_name: evalName }) }), expect.any(Function)); })); }); }); //# sourceMappingURL=evaluator.test.js.map