@gentrace/evals
Version:
Gentrace Evals plugin for Node.JS
1 lines • 6.98 kB
Source Map (JSON)
{"version":3,"file":"llm-evaluator.mjs","sources":["../../../../src/llm-evaluator.ts"],"sourcesContent":["import {\n LocalEvaluation,\n LocalEvaluationDebug,\n LocalEvaluationDebugError,\n} from \"@gentrace/core\";\nimport OpenAI from \"openai\";\nimport { z } from \"zod\";\nimport { zodResponseFormat } from \"openai/helpers/zod\";\nimport { ChatCompletionMessageParam } from \"openai/resources/chat/completions\";\n\nconst DEFAULT_MODEL = \"gpt-4o\";\n\ninterface EvalOptions {\n name: string;\n prompt: string;\n scoreAs: Record<string, number> | \"percentage\";\n imageUrls?: string[];\n}\n\nexport namespace evals {\n export namespace llm {\n export async function base(options: EvalOptions): Promise<LocalEvaluation> {\n const openai = new OpenAI({\n apiKey: process.env.OPENAI_API_KEY,\n });\n\n const scoreSchema =\n options.scoreAs === \"percentage\"\n ? z.number().min(0).max(100)\n : z.enum(Object.keys(options.scoreAs) as [string, ...string[]]);\n\n const EvalResponse = z.object({\n reasoning: z.string(),\n score: scoreSchema,\n });\n\n type EvalResponseType = z.infer<typeof EvalResponse>;\n\n let parsedResponse: EvalResponseType | null = null;\n let error: LocalEvaluationDebugError | null = null;\n\n const imageUrls: ChatCompletionMessageParam[] = options.imageUrls\n ? [\n {\n role: \"user\",\n content: options.imageUrls.map((url) => ({\n type: \"image_url\",\n image_url: { url },\n })),\n },\n ]\n : [];\n\n try {\n const completion = await openai.beta.chat.completions.parse({\n model: DEFAULT_MODEL,\n messages: [\n {\n role: \"system\",\n content:\n \"You are a helpful assistant that evaluates content based on given criteria.\",\n },\n { role: \"user\", content: options.prompt },\n ...imageUrls,\n ],\n response_format: zodResponseFormat(\n EvalResponse,\n \"provide_evaluation\",\n ),\n });\n\n const message = completion.choices[0]?.message;\n if (!message) {\n throw new Error(\"No message returned from the API\");\n }\n if (!message.parsed) {\n const refusal = message.refusal || \"Unknown refusal reason\";\n throw new Error(\n `OpenAI failed to create a structured response: ${refusal}`,\n );\n }\n\n parsedResponse = message.parsed as EvalResponseType;\n } catch (err) {\n error = {\n message: err instanceof Error ? err.message : String(err),\n };\n }\n\n let value: number | null = null;\n if (parsedResponse) {\n if (options.scoreAs === \"percentage\") {\n value = (parsedResponse.score as number) / 100;\n } else {\n value =\n options.scoreAs[\n parsedResponse.score as keyof typeof options.scoreAs\n ];\n }\n }\n\n const debug: LocalEvaluationDebug = {\n resolvedPrompt: options.prompt,\n response: parsedResponse ? JSON.stringify(parsedResponse) : undefined,\n finalClassification: parsedResponse?.score.toString(),\n processorLogs: [],\n logs: [],\n error: error,\n };\n\n return {\n name: options.name,\n value: value !== null ? value : 0, // Default to 0 if there was an error\n label:\n options.scoreAs === \"percentage\"\n ? null\n : (parsedResponse?.score?.toString() ?? null),\n debug: debug,\n };\n }\n }\n}\n"],"names":[],"mappings":";;;;;AAUA,MAAM,aAAa,GAAG,QAAQ,CAAC;AASzB,IAAW,MAsGhB;AAtGD,CAAA,UAAiB,KAAK,EAAA;AACpB,IAAA,CAAA,UAAiB,GAAG,EAAA;QAClB,SAAsB,IAAI,CAAC,OAAoB,EAAA;;;AAC7C,gBAAA,MAAM,MAAM,GAAG,IAAI,MAAM,CAAC;AACxB,oBAAA,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,cAAc;AACnC,iBAAA,CAAC,CAAC;AAEH,gBAAA,MAAM,WAAW,GACf,OAAO,CAAC,OAAO,KAAK,YAAY;AAC9B,sBAAE,CAAC,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,sBAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAA0B,CAAC,CAAC;AAEpE,gBAAA,MAAM,YAAY,GAAG,CAAC,CAAC,MAAM,CAAC;AAC5B,oBAAA,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE;AACrB,oBAAA,KAAK,EAAE,WAAW;AACnB,iBAAA,CAAC,CAAC;gBAIH,IAAI,cAAc,GAA4B,IAAI,CAAC;gBACnD,IAAI,KAAK,GAAqC,IAAI,CAAC;AAEnD,gBAAA,MAAM,SAAS,GAAiC,OAAO,CAAC,SAAS;AAC/D,sBAAE;AACE,wBAAA;AACE,4BAAA,IAAI,EAAE,MAAM;AACZ,4BAAA,OAAO,EAAE,OAAO,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;AACvC,gCAAA,IAAI,EAAE,WAAW;gCACjB,SAAS,EAAE,EAAE,GAAG,EAAE;AACnB,6BAAA,CAAC,CAAC;AACJ,yBAAA;AACF,qBAAA;sBACD,EAAE,CAAC;gBAEP,IAAI;AACF,oBAAA,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC;AAC1D,wBAAA,KAAK,EAAE,aAAa;AACpB,wBAAA,QAAQ,EAAE;AACR,4BAAA;AACE,gCAAA,IAAI,EAAE,QAAQ;AACd,gCAAA,OAAO,EACL,6EAA6E;AAChF,6BAAA;4BACD,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,MAAM,EAAE;AACzC,4BAAA,GAAG,SAAS;AACb,yBAAA;AACD,wBAAA,eAAe,EAAE,iBAAiB,CAChC,YAAY,EACZ,oBAAoB,CACrB;AACF,qBAAA,CAAC,CAAC;oBAEH,MAAM,OAAO,GAAG,CAAA,EAAA,GAAA,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,MAAE,IAAA,IAAA,EAAA,KAAA,KAAA,CAAA,GAAA,KAAA,CAAA,GAAA,EAAA,CAAA,OAAO,CAAC;oBAC/C,IAAI,CAAC,OAAO,EAAE;AACZ,wBAAA,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;AACrD,qBAAA;AACD,oBAAA,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE;AACnB,wBAAA,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,wBAAwB,CAAC;AAC5D,wBAAA,MAAM,IAAI,KAAK,CACb,kDAAkD,OAAO,CAAA,CAAE,CAC5D,CAAC;AACH,qBAAA;AAED,oBAAA,cAAc,GAAG,OAAO,CAAC,MAA0B,CAAC;AACrD,iBAAA;AAAC,gBAAA,OAAO,GAAG,EAAE;AACZ,oBAAA,KAAK,GAAG;AACN,wBAAA,OAAO,EAAE,GAAG,YAAY,KAAK,GAAG,GAAG,CAAC,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC;qBAC1D,CAAC;AACH,iBAAA;gBAED,IAAI,KAAK,GAAkB,IAAI,CAAC;AAChC,gBAAA,IAAI,cAAc,EAAE;AAClB,oBAAA,IAAI,OAAO,CAAC,OAAO,KAAK,YAAY,EAAE;AACpC,wBAAA,KAAK,GAAI,cAAc,CAAC,KAAgB,GAAG,GAAG,CAAC;AAChD,qBAAA;AAAM,yBAAA;wBACL,KAAK;AACH,4BAAA,OAAO,CAAC,OAAO,CACb,cAAc,CAAC,KAAqC,CACrD,CAAC;AACL,qBAAA;AACF,iBAAA;AAED,gBAAA,MAAM,KAAK,GAAyB;oBAClC,cAAc,EAAE,OAAO,CAAC,MAAM;AAC9B,oBAAA,QAAQ,EAAE,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,GAAG,SAAS;oBACrE,mBAAmB,EAAE,cAAc,KAAA,IAAA,IAAd,cAAc,KAAA,KAAA,CAAA,GAAA,KAAA,CAAA,GAAd,cAAc,CAAE,KAAK,CAAC,QAAQ,EAAE;AACrD,oBAAA,aAAa,EAAE,EAAE;AACjB,oBAAA,IAAI,EAAE,EAAE;AACR,oBAAA,KAAK,EAAE,KAAK;iBACb,CAAC;gBAEF,OAAO;oBACL,IAAI,EAAE,OAAO,CAAC,IAAI;oBAClB,KAAK,EAAE,KAAK,KAAK,IAAI,GAAG,KAAK,GAAG,CAAC;AACjC,oBAAA,KAAK,EACH,OAAO,CAAC,OAAO,KAAK,YAAY;AAC9B,0BAAE,IAAI;AACN,2BAAG,CAAA,EAAA,GAAA,CAAA,EAAA,GAAA,cAAc,aAAd,cAAc,KAAA,KAAA,CAAA,GAAA,KAAA,CAAA,GAAd,cAAc,CAAE,KAAK,MAAE,IAAA,IAAA,EAAA,KAAA,KAAA,CAAA,GAAA,KAAA,CAAA,GAAA,EAAA,CAAA,QAAQ,EAAE,MAAA,IAAA,IAAA,EAAA,KAAA,KAAA,CAAA,GAAA,EAAA,GAAI,IAAI,CAAC;AACjD,oBAAA,KAAK,EAAE,KAAK;iBACb,CAAC;;AACH,SAAA;AAlGqB,QAAA,GAAA,CAAA,IAAI,OAkGzB,CAAA;AACH,KAAC,EApGgB,KAAG,CAAA,GAAA,KAAH,SAAG,GAoGnB,EAAA,CAAA,CAAA,CAAA;AACH,CAAC,EAtGgB,KAAK,KAAL,KAAK,GAsGrB,EAAA,CAAA,CAAA;;;;"}