@future-agi/ai-evaluation
Version:
We help GenAI teams maintain high-accuracy for their Models in production.
283 lines • 14.4 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __rest = (this && this.__rest) || function (s, e) {
var t = {};
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
t[p] = s[p];
if (s != null && typeof Object.getOwnPropertySymbols === "function")
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
t[p[i]] = s[p[i]];
}
return t;
};
import { APIKeyAuth, ResponseHandler, HttpMethod, Routes, InvalidAuthError } from '@future-agi/sdk';
/**
* Handles responses for evaluation requests
*/
export class EvalResponseHandler extends ResponseHandler {
static _parseSuccess(response) {
var _a, _b, _c, _d, _e;
const data = response.data || {};
const evalResults = [];
if (Array.isArray(data.result)) {
for (const result of data.result) {
if (result && Array.isArray(result.evaluations)) {
for (const evaluation of result.evaluations) {
const newMetadata = {};
if (evaluation === null || evaluation === void 0 ? void 0 : evaluation.metadata) {
let metadata = evaluation.metadata;
if (typeof metadata === "string") {
try {
metadata = JSON.parse(metadata);
}
catch ( /* ignore parse errors */_f) { /* ignore parse errors */ }
}
if (metadata && typeof metadata === "object") {
newMetadata["usage"] = (_a = metadata.usage) !== null && _a !== void 0 ? _a : {};
newMetadata["cost"] = (_b = metadata.cost) !== null && _b !== void 0 ? _b : {};
newMetadata["explanation"] = (_c = metadata.explanation) !== null && _c !== void 0 ? _c : {};
}
}
evalResults.push({
data: evaluation === null || evaluation === void 0 ? void 0 : evaluation.data,
failure: evaluation === null || evaluation === void 0 ? void 0 : evaluation.failure,
reason: (_d = evaluation === null || evaluation === void 0 ? void 0 : evaluation.reason) !== null && _d !== void 0 ? _d : "",
runtime: (_e = evaluation === null || evaluation === void 0 ? void 0 : evaluation.runtime) !== null && _e !== void 0 ? _e : 0,
metadata: newMetadata,
metrics: Array.isArray(evaluation === null || evaluation === void 0 ? void 0 : evaluation.metrics)
? evaluation.metrics.map((m) => ({
id: m.id,
value: m.value,
}))
: [],
});
}
}
}
}
return { eval_results: evalResults };
}
static _handleError(response) {
if (response.status === 400) {
throw new Error(`Evaluation failed with a 400 Bad Request. Please check your input data and evaluation configuration. Response: ${response.data}`);
}
else if (response.status === 403) {
throw new InvalidAuthError();
}
else {
throw new Error(`Error in evaluation: ${response.status}, response: ${response.data}`);
}
}
}
/**
* Handles responses for evaluation info requests
*/
export class EvalInfoResponseHandler extends ResponseHandler {
static _parseSuccess(response) {
const data = response.data;
if (data.result) {
return data.result;
}
else {
throw new Error(`Failed to get evaluation info: ${data}`);
}
}
static _handleError(response) {
if (response.status === 400) {
// In TypeScript with axios, it's more common to let the caller handle response.data
throw new Error(`Bad request: ${response.data}`);
}
if (response.status === 403) {
throw new InvalidAuthError();
}
throw new Error(`Failed to get evaluation info: ${response.status}`);
}
}
/**
* Client for evaluating LLM test cases
*/
export class Evaluator extends APIKeyAuth {
constructor(options = {}) {
const fiApiKey = process.env.FI_API_KEY || options.fiApiKey;
const fiSecretKey = process.env.FI_SECRET_KEY || options.fiSecretKey;
const fiBaseUrl = process.env.FI_BASE_URL || options.fiBaseUrl;
super(Object.assign(Object.assign({}, options), { fiApiKey, fiSecretKey, fiBaseUrl }));
this.evalInfoCache = new Map();
this.maxWorkers = options.maxWorkers || 8;
}
evaluate(evalTemplates, inputs, options) {
return __awaiter(this, void 0, void 0, function* () {
const { timeout, modelName, customEvalName } = options;
if (!modelName || modelName.trim() === "") {
throw new TypeError("'modelName' is a required option and must be a non-empty string.");
}
let traceEval = options.traceEval || false;
let spanId = undefined;
const extractName = (t) => {
if (typeof t === 'string') {
return t;
}
if (typeof t === 'object' && t.eval_name) {
return t.eval_name;
}
return undefined;
};
const firstTemplate = Array.isArray(evalTemplates) ? evalTemplates[0] : evalTemplates;
const evalName = extractName(firstTemplate);
if (!evalName) {
throw new TypeError("Unsupported eval_templates argument. Expect eval template class/obj or name str.");
}
// OpenTelemetry logic
if (traceEval) {
if (!customEvalName) {
traceEval = false;
console.warn("Failed to trace the evaluation. Please set the customEvalName.");
}
else {
try {
// Dynamically import to avoid making OTEL a hard dependency
const otel = yield import('@opentelemetry/api');
const { checkCustomEvalConfigExists } = yield import('@traceai/fi-core');
const currentSpan = otel.trace.getSpan(otel.context.active());
if (currentSpan && currentSpan.isRecording()) {
const spanContext = currentSpan.spanContext();
if (otel.isSpanContextValid(spanContext)) {
spanId = spanContext.spanId;
// Accessing the resource is not part of the public API interface,
// but is available on SDK implementations. This mirrors the Python SDK's approach.
const tracerProvider = otel.trace.getTracerProvider();
// @ts-ignore
const resource = tracerProvider.resource || (currentSpan && (currentSpan).resource);
let projectName = resource === null || resource === void 0 ? void 0 : resource.attributes['project_name'];
if (!projectName) {
// Fallback to standard OTEL service.name if custom attribute is absent
projectName = resource === null || resource === void 0 ? void 0 : resource.attributes['service.name'];
}
if (projectName) {
const evalTags = [{
custom_eval_name: customEvalName,
eval_name: evalName,
mapping: {},
config: {},
}];
const customEvalExists = yield checkCustomEvalConfigExists(projectName, evalTags);
if (customEvalExists) {
traceEval = false;
console.warn("Failed to trace the evaluation. Custom eval configuration with the same name already exists for this project");
}
}
else {
traceEval = false;
console.warn("Could not determine project_name from OpenTelemetry context. " +
"Skipping check for existing custom eval configuration.");
}
}
}
}
catch (error) {
console.warn("OpenTelemetry API not found. Please install '@opentelemetry/api' to enable tracing. " +
"Skipping trace for this evaluation.", error);
traceEval = false;
}
}
}
const transformedApiInputs = {};
if (Array.isArray(inputs)) {
// Explicitly disallow array-of-dicts per spec
throw new TypeError("'inputs' must be a dictionary, array-of-dicts is not supported.");
}
for (const [key, value] of Object.entries(inputs)) {
if (Array.isArray(value)) {
if (!value.every(v => typeof v === "string")) {
throw new TypeError(`All values in array for key '${key}' must be strings.`);
}
transformedApiInputs[key] = value;
}
else if (typeof value === "string") {
transformedApiInputs[key] = [value];
}
else {
throw new TypeError(`Invalid input type for key '${key}'. Expected string or string[].`);
}
}
const finalApiPayload = {
eval_name: evalName,
inputs: transformedApiInputs,
model: modelName,
span_id: spanId,
custom_eval_name: customEvalName,
trace_eval: traceEval,
};
// Convert timeout (seconds) to milliseconds for axios. Use a higher default (200s) if not provided.
const timeoutMs = timeout !== undefined ? timeout * 1000 : this.defaultTimeout * 1000;
try {
const response = yield this.request({
method: HttpMethod.POST,
url: `${this.baseUrl}/${Routes.evaluatev2}`,
json: finalApiPayload,
timeout: timeoutMs,
}, EvalResponseHandler);
return response;
}
catch (error) {
console.error("Evaluation failed:", error);
throw error;
}
});
}
_get_eval_info(evalName) {
return __awaiter(this, void 0, void 0, function* () {
if (this.evalInfoCache.has(evalName)) {
return this.evalInfoCache.get(evalName);
}
const response = yield this.request({
method: HttpMethod.GET,
url: `${this.baseUrl}/${Routes.get_eval_templates}`,
}, EvalInfoResponseHandler);
const evalInfo = response.find(item => item.name === evalName);
if (!evalInfo) {
throw new Error(`Evaluation template with name '${evalName}' not found`);
}
this.evalInfoCache.set(evalName, evalInfo);
return evalInfo;
});
}
list_evaluations() {
return __awaiter(this, void 0, void 0, function* () {
const config = {
method: HttpMethod.GET,
url: `${this.baseUrl}/${Routes.get_eval_templates}`
};
const response = yield this.request(config, EvalInfoResponseHandler);
return response;
});
}
}
/**
* Convenience function to run a single or batch of evaluations.
* @param evalTemplates - Evaluation name string (e.g., "Factual Accuracy") or list of templates.
* @param inputs - Single test case or list of test cases as dictionaries.
* @param options - Optional parameters for the evaluation.
* @returns BatchRunResult containing evaluation results.
*/
export const evaluate = (evalTemplates, inputs, options) => {
const { fiApiKey, fiSecretKey, fiBaseUrl } = options, restOptions = __rest(options, ["fiApiKey", "fiSecretKey", "fiBaseUrl"]);
return new Evaluator({ fiApiKey, fiSecretKey, fiBaseUrl }).evaluate(evalTemplates, inputs, restOptions);
};
/**
* Convenience function to fetch information about all available evaluation templates.
* @returns A list of evaluation template information dictionaries.
*/
export const list_evaluations = (options = {}) => {
const { fiApiKey, fiSecretKey, fiBaseUrl } = options;
return new Evaluator({ fiApiKey, fiSecretKey, fiBaseUrl }).list_evaluations();
};
//# sourceMappingURL=evaluator.js.map