@gentrace/evals
Version:
Gentrace Evals plugin for Node.JS
98 lines (95 loc) • 4.39 kB
JavaScript
;
var tslib_es6 = require('../../../node_modules/.pnpm/@rollup_plugin-typescript@11.1.6_rollup@3.29.4_tslib@2.6.2_typescript@5.1.6/node_modules/tslib/tslib.es6.js');
var OpenAI = require('openai');
var zod = require('zod');
var zod$1 = require('openai/helpers/zod');
const DEFAULT_MODEL = "gpt-4o";
exports.evals = void 0;
(function (evals) {
(function (llm) {
function base(options) {
var _a, _b, _c;
return tslib_es6.__awaiter(this, void 0, void 0, function* () {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const scoreSchema = options.scoreAs === "percentage"
? zod.z.number().min(0).max(100)
: zod.z.enum(Object.keys(options.scoreAs));
const EvalResponse = zod.z.object({
reasoning: zod.z.string(),
score: scoreSchema,
});
let parsedResponse = null;
let error = null;
const imageUrls = options.imageUrls
? [
{
role: "user",
content: options.imageUrls.map((url) => ({
type: "image_url",
image_url: { url },
})),
},
]
: [];
try {
const completion = yield openai.beta.chat.completions.parse({
model: DEFAULT_MODEL,
messages: [
{
role: "system",
content: "You are a helpful assistant that evaluates content based on given criteria.",
},
{ role: "user", content: options.prompt },
...imageUrls,
],
response_format: zod$1.zodResponseFormat(EvalResponse, "provide_evaluation"),
});
const message = (_a = completion.choices[0]) === null || _a === void 0 ? void 0 : _a.message;
if (!message) {
throw new Error("No message returned from the API");
}
if (!message.parsed) {
const refusal = message.refusal || "Unknown refusal reason";
throw new Error(`OpenAI failed to create a structured response: ${refusal}`);
}
parsedResponse = message.parsed;
}
catch (err) {
error = {
message: err instanceof Error ? err.message : String(err),
};
}
let value = null;
if (parsedResponse) {
if (options.scoreAs === "percentage") {
value = parsedResponse.score / 100;
}
else {
value =
options.scoreAs[parsedResponse.score];
}
}
const debug = {
resolvedPrompt: options.prompt,
response: parsedResponse ? JSON.stringify(parsedResponse) : undefined,
finalClassification: parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score.toString(),
processorLogs: [],
logs: [],
error: error,
};
return {
name: options.name,
value: value !== null ? value : 0,
label: options.scoreAs === "percentage"
? null
: ((_c = (_b = parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score) === null || _b === void 0 ? void 0 : _b.toString()) !== null && _c !== void 0 ? _c : null),
debug: debug,
};
});
}
llm.base = base;
})(evals.llm || (evals.llm = {}));
})(exports.evals || (exports.evals = {}));
//# sourceMappingURL=llm-evaluator.js.map