UNPKG

@future-agi/ai-evaluation

Version:

We help GenAI teams maintain high-accuracy for their Models in production.

43 lines 2.36 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { Evaluator } from '../evaluator'; // Increase default timeout since network calls may be slow jest.setTimeout(30000); // Helper to decide if we have credentials for running end-to-end const hasCredentials = Boolean(process.env.FI_API_KEY && process.env.FI_SECRET_KEY); // Use describe.skip if credentials are missing so test run passes quickly in CI without secrets const describeMaybe = hasCredentials ? describe : describe.skip; describeMaybe('Evaluator – end-to-end (real network)', () => { const evaluator = new Evaluator({ fiApiKey: process.env.FI_API_KEY, fiSecretKey: process.env.FI_SECRET_KEY, fiBaseUrl: process.env.FI_BASE_URL, // optional – falls back to prod timeout: 25000, }); it('should evaluate "factual_accuracy" successfully', () => __awaiter(void 0, void 0, void 0, function* () { const inputs = { input: 'What is the capital of France?', output: 'Paris is the capital of France', }; const result = yield evaluator.evaluate('factual_accuracy', inputs, { modelName: 'turing_flash', }); expect(result).toBeDefined(); expect(Array.isArray(result.eval_results)).toBe(true); expect(result.eval_results.length).toBeGreaterThan(0); })); it('should list available evaluations', () => __awaiter(void 0, void 0, void 0, function* () { const evaluations = yield evaluator.list_evaluations(); expect(evaluations).toBeDefined(); expect(Array.isArray(evaluations)).toBe(true); expect(evaluations.length).toBeGreaterThan(0); })); }); //# sourceMappingURL=evaluator.e2e.test.js.map