@future-agi/ai-evaluation
Version:
We help GenAI teams maintain high-accuracy for their Models in production.
43 lines • 2.36 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { Evaluator } from '../evaluator';
// Increase default timeout since network calls may be slow
jest.setTimeout(30000);
// Helper to decide if we have credentials for running end-to-end
const hasCredentials = Boolean(process.env.FI_API_KEY && process.env.FI_SECRET_KEY);
// Use describe.skip if credentials are missing so test run passes quickly in CI without secrets
const describeMaybe = hasCredentials ? describe : describe.skip;
describeMaybe('Evaluator – end-to-end (real network)', () => {
const evaluator = new Evaluator({
fiApiKey: process.env.FI_API_KEY,
fiSecretKey: process.env.FI_SECRET_KEY,
fiBaseUrl: process.env.FI_BASE_URL, // optional – falls back to prod
timeout: 25000,
});
it('should evaluate "factual_accuracy" successfully', () => __awaiter(void 0, void 0, void 0, function* () {
const inputs = {
input: 'What is the capital of France?',
output: 'Paris is the capital of France',
};
const result = yield evaluator.evaluate('factual_accuracy', inputs, {
modelName: 'turing_flash',
});
expect(result).toBeDefined();
expect(Array.isArray(result.eval_results)).toBe(true);
expect(result.eval_results.length).toBeGreaterThan(0);
}));
it('should list available evaluations', () => __awaiter(void 0, void 0, void 0, function* () {
const evaluations = yield evaluator.list_evaluations();
expect(evaluations).toBeDefined();
expect(Array.isArray(evaluations)).toBe(true);
expect(evaluations.length).toBeGreaterThan(0);
}));
});
//# sourceMappingURL=evaluator.e2e.test.js.map