UNPKG

@gentrace/evals

Version:
103 lines (102 loc) 4.83 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import OpenAI from "openai"; import { z } from "zod"; import { zodResponseFormat } from "openai/helpers/zod"; const DEFAULT_MODEL = "gpt-4o"; export var evals; (function (evals) { let llm; (function (llm) { function base(options) { var _a, _b, _c; return __awaiter(this, void 0, void 0, function* () { const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); const scoreSchema = options.scoreAs === "percentage" ? z.number().min(0).max(100) : z.enum(Object.keys(options.scoreAs)); const EvalResponse = z.object({ reasoning: z.string(), score: scoreSchema, }); let parsedResponse = null; let error = null; const imageUrls = options.imageUrls ? [ { role: "user", content: options.imageUrls.map((url) => ({ type: "image_url", image_url: { url }, })), }, ] : []; try { const completion = yield openai.beta.chat.completions.parse({ model: DEFAULT_MODEL, messages: [ { role: "system", content: "You are a helpful assistant that evaluates content based on given criteria.", }, { role: "user", content: options.prompt }, ...imageUrls, ], response_format: zodResponseFormat(EvalResponse, "provide_evaluation"), }); const message = (_a = completion.choices[0]) === null || _a === void 0 ? void 0 : _a.message; if (!message) { throw new Error("No message returned from the API"); } if (!message.parsed) { const refusal = message.refusal || "Unknown refusal reason"; throw new Error(`OpenAI failed to create a structured response: ${refusal}`); } parsedResponse = message.parsed; } catch (err) { error = { message: err instanceof Error ? err.message : String(err), }; } let value = null; if (parsedResponse) { if (options.scoreAs === "percentage") { value = parsedResponse.score / 100; } else { value = options.scoreAs[parsedResponse.score]; } } const debug = { resolvedPrompt: options.prompt, response: parsedResponse ? JSON.stringify(parsedResponse) : undefined, finalClassification: parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score.toString(), processorLogs: [], logs: [], error: error, }; return { name: options.name, value: value !== null ? value : 0, label: options.scoreAs === "percentage" ? null : ((_c = (_b = parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score) === null || _b === void 0 ? void 0 : _b.toString()) !== null && _c !== void 0 ? _c : null), debug: debug, }; }); } llm.base = base; })(llm = evals.llm || (evals.llm = {})); })(evals || (evals = {}));