UNPKG

@gentrace/evals

Version:
98 lines (95 loc) 4.39 kB
'use strict'; var tslib_es6 = require('../../../node_modules/.pnpm/@rollup_plugin-typescript@11.1.6_rollup@3.29.4_tslib@2.6.2_typescript@5.1.6/node_modules/tslib/tslib.es6.js'); var OpenAI = require('openai'); var zod = require('zod'); var zod$1 = require('openai/helpers/zod'); const DEFAULT_MODEL = "gpt-4o"; exports.evals = void 0; (function (evals) { (function (llm) { function base(options) { var _a, _b, _c; return tslib_es6.__awaiter(this, void 0, void 0, function* () { const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); const scoreSchema = options.scoreAs === "percentage" ? zod.z.number().min(0).max(100) : zod.z.enum(Object.keys(options.scoreAs)); const EvalResponse = zod.z.object({ reasoning: zod.z.string(), score: scoreSchema, }); let parsedResponse = null; let error = null; const imageUrls = options.imageUrls ? [ { role: "user", content: options.imageUrls.map((url) => ({ type: "image_url", image_url: { url }, })), }, ] : []; try { const completion = yield openai.beta.chat.completions.parse({ model: DEFAULT_MODEL, messages: [ { role: "system", content: "You are a helpful assistant that evaluates content based on given criteria.", }, { role: "user", content: options.prompt }, ...imageUrls, ], response_format: zod$1.zodResponseFormat(EvalResponse, "provide_evaluation"), }); const message = (_a = completion.choices[0]) === null || _a === void 0 ? void 0 : _a.message; if (!message) { throw new Error("No message returned from the API"); } if (!message.parsed) { const refusal = message.refusal || "Unknown refusal reason"; throw new Error(`OpenAI failed to create a structured response: ${refusal}`); } parsedResponse = message.parsed; } catch (err) { error = { message: err instanceof Error ? err.message : String(err), }; } let value = null; if (parsedResponse) { if (options.scoreAs === "percentage") { value = parsedResponse.score / 100; } else { value = options.scoreAs[parsedResponse.score]; } } const debug = { resolvedPrompt: options.prompt, response: parsedResponse ? JSON.stringify(parsedResponse) : undefined, finalClassification: parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score.toString(), processorLogs: [], logs: [], error: error, }; return { name: options.name, value: value !== null ? value : 0, label: options.scoreAs === "percentage" ? null : ((_c = (_b = parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score) === null || _b === void 0 ? void 0 : _b.toString()) !== null && _c !== void 0 ? _c : null), debug: debug, }; }); } llm.base = base; })(evals.llm || (evals.llm = {})); })(exports.evals || (exports.evals = {})); //# sourceMappingURL=llm-evaluator.js.map