@gentrace/evals
Version:
Gentrace Evals plugin for Node.JS
123 lines (108 loc) • 3.52 kB
text/typescript
import {
LocalEvaluation,
LocalEvaluationDebug,
LocalEvaluationDebugError,
} from "@gentrace/core";
import OpenAI from "openai";
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
import { ChatCompletionMessageParam } from "openai/resources/chat/completions";
const DEFAULT_MODEL = "gpt-4o";
interface EvalOptions {
name: string;
prompt: string;
scoreAs: Record<string, number> | "percentage";
imageUrls?: string[];
}
export namespace evals {
export namespace llm {
export async function base(options: EvalOptions): Promise<LocalEvaluation> {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
const scoreSchema =
options.scoreAs === "percentage"
? z.number().min(0).max(100)
: z.enum(Object.keys(options.scoreAs) as [string, ...string[]]);
const EvalResponse = z.object({
reasoning: z.string(),
score: scoreSchema,
});
type EvalResponseType = z.infer<typeof EvalResponse>;
let parsedResponse: EvalResponseType | null = null;
let error: LocalEvaluationDebugError | null = null;
const imageUrls: ChatCompletionMessageParam[] = options.imageUrls
? [
{
role: "user",
content: options.imageUrls.map((url) => ({
type: "image_url",
image_url: { url },
})),
},
]
: [];
try {
const completion = await openai.beta.chat.completions.parse({
model: DEFAULT_MODEL,
messages: [
{
role: "system",
content:
"You are a helpful assistant that evaluates content based on given criteria.",
},
{ role: "user", content: options.prompt },
...imageUrls,
],
response_format: zodResponseFormat(
EvalResponse,
"provide_evaluation",
),
});
const message = completion.choices[0]?.message;
if (!message) {
throw new Error("No message returned from the API");
}
if (!message.parsed) {
const refusal = message.refusal || "Unknown refusal reason";
throw new Error(
`OpenAI failed to create a structured response: ${refusal}`,
);
}
parsedResponse = message.parsed as EvalResponseType;
} catch (err) {
error = {
message: err instanceof Error ? err.message : String(err),
};
}
let value: number | null = null;
if (parsedResponse) {
if (options.scoreAs === "percentage") {
value = (parsedResponse.score as number) / 100;
} else {
value =
options.scoreAs[
parsedResponse.score as keyof typeof options.scoreAs
];
}
}
const debug: LocalEvaluationDebug = {
resolvedPrompt: options.prompt,
response: parsedResponse ? JSON.stringify(parsedResponse) : undefined,
finalClassification: parsedResponse?.score.toString(),
processorLogs: [],
logs: [],
error: error,
};
return {
name: options.name,
value: value !== null ? value : 0, // Default to 0 if there was an error
label:
options.scoreAs === "percentage"
? null
: (parsedResponse?.score?.toString() ?? null),
debug: debug,
};
}
}
}