@gentrace/evals
Version:
Gentrace Evals plugin for Node.JS
103 lines (102 loc) • 4.83 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import OpenAI from "openai";
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
const DEFAULT_MODEL = "gpt-4o";
export var evals;
(function (evals) {
let llm;
(function (llm) {
function base(options) {
var _a, _b, _c;
return __awaiter(this, void 0, void 0, function* () {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const scoreSchema = options.scoreAs === "percentage"
? z.number().min(0).max(100)
: z.enum(Object.keys(options.scoreAs));
const EvalResponse = z.object({
reasoning: z.string(),
score: scoreSchema,
});
let parsedResponse = null;
let error = null;
const imageUrls = options.imageUrls
? [
{
role: "user",
content: options.imageUrls.map((url) => ({
type: "image_url",
image_url: { url },
})),
},
]
: [];
try {
const completion = yield openai.beta.chat.completions.parse({
model: DEFAULT_MODEL,
messages: [
{
role: "system",
content: "You are a helpful assistant that evaluates content based on given criteria.",
},
{ role: "user", content: options.prompt },
...imageUrls,
],
response_format: zodResponseFormat(EvalResponse, "provide_evaluation"),
});
const message = (_a = completion.choices[0]) === null || _a === void 0 ? void 0 : _a.message;
if (!message) {
throw new Error("No message returned from the API");
}
if (!message.parsed) {
const refusal = message.refusal || "Unknown refusal reason";
throw new Error(`OpenAI failed to create a structured response: ${refusal}`);
}
parsedResponse = message.parsed;
}
catch (err) {
error = {
message: err instanceof Error ? err.message : String(err),
};
}
let value = null;
if (parsedResponse) {
if (options.scoreAs === "percentage") {
value = parsedResponse.score / 100;
}
else {
value =
options.scoreAs[parsedResponse.score];
}
}
const debug = {
resolvedPrompt: options.prompt,
response: parsedResponse ? JSON.stringify(parsedResponse) : undefined,
finalClassification: parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score.toString(),
processorLogs: [],
logs: [],
error: error,
};
return {
name: options.name,
value: value !== null ? value : 0,
label: options.scoreAs === "percentage"
? null
: ((_c = (_b = parsedResponse === null || parsedResponse === void 0 ? void 0 : parsedResponse.score) === null || _b === void 0 ? void 0 : _b.toString()) !== null && _c !== void 0 ? _c : null),
debug: debug,
};
});
}
llm.base = base;
})(llm = evals.llm || (evals.llm = {}));
})(evals || (evals = {}));