UNPKG

evalite

Version:

Test your LLM-powered apps with a TypeScript-native, Vitest-based eval runner. No API key required.

269 lines 10.6 kB
import { fastifyStatic } from "@fastify/static"; import { fastifyWebsocket } from "@fastify/websocket"; import fastify from "fastify"; import path from "path"; import { fileURLToPath } from "url"; import { getAverageScoresFromResults, getEvalByName, getEvals, getEvalsAverageScores, getHistoricalEvalsWithScoresByName, getMostRecentRun, getPreviousCompletedEval, getResults, getScores, getTraces, } from "./db.js"; import { average } from "./utils.js"; const THROTTLE_TIME = 100; export const handleWebsockets = (server) => { const websocketListeners = new Map(); let currentState = { type: "idle", }; let timeout; server.register(async (fastify) => { fastify.get("/api/socket", { websocket: true }, (socket, req) => { websocketListeners.set(req.id, (event) => { socket.send(JSON.stringify(event)); }); socket.on("close", () => { websocketListeners.delete(req.id); }); }); }); return { updateState: (newState) => { currentState = newState; clearTimeout(timeout); timeout = setTimeout(() => { websocketListeners.forEach((listener) => { listener(newState); }); }, THROTTLE_TIME); }, getState: () => currentState, }; }; export const createServer = (opts) => { const UI_ROOT = path.join(path.dirname(fileURLToPath(import.meta.url)), "./ui"); const server = fastify(); server.register(fastifyWebsocket); server.register(fastifyStatic, { root: path.join(UI_ROOT), }); server.setNotFoundHandler(async (req, reply) => { return reply.status(200).sendFile("index.html"); }); // Add CORS headers server.addHook("onSend", (req, reply, payload, done) => { reply.header("access-control-allow-origin", "*"); done(null, payload); }); const websockets = handleWebsockets(server); server.get("/api/server-state", async (req, reply) => { return reply.code(200).send(websockets.getState()); }); server.get("/api/menu-items", async (req, reply) => { const latestFullRun = getMostRecentRun(opts.db, "full"); if (!latestFullRun) { return reply.code(200).send({ evals: [], prevScore: undefined, score: 0, evalStatus: "success", }); } let latestPartialRun = getMostRecentRun(opts.db, "partial"); /** * Ignore latestPartialRun if the latestFullRun is more * up to date */ if (latestPartialRun && new Date(latestPartialRun.created_at).getTime() < new Date(latestFullRun.created_at).getTime()) { latestPartialRun = undefined; } const allEvals = getEvals(opts.db, [latestFullRun.id, latestPartialRun?.id].filter((id) => typeof id === "number"), ["fail", "success", "running"]).map((e) => ({ ...e, prevEval: getPreviousCompletedEval(opts.db, e.name, e.created_at), })); const evalsAverageScores = getEvalsAverageScores(opts.db, allEvals.flatMap((e) => { if (e.prevEval) { return [e.id, e.prevEval.id]; } return [e.id]; })); const createEvalMenuItem = (e) => { const score = evalsAverageScores.find((s) => s.eval_id === e.id)?.average ?? 0; const prevScore = evalsAverageScores.find((s) => s.eval_id === e.prevEval?.id)?.average; return { filepath: e.filepath, name: e.name, score, prevScore, evalStatus: e.status, }; }; let lastFullRunEvals = allEvals.filter((e) => e.run_id === latestFullRun.id); if (latestPartialRun) { const partialEvals = allEvals.filter((e) => e.run_id === latestPartialRun.id); // Filter out the partial evals from the full run // and add them to the lastFullRunEvals lastFullRunEvals = [ ...partialEvals, ...lastFullRunEvals.filter((e) => !partialEvals.some((p) => p.name === e.name)), ]; } const menuItems = lastFullRunEvals.map(createEvalMenuItem).sort((a, b) => { return a.name.localeCompare(b.name); }); return reply.code(200).send({ evals: menuItems, score: average(menuItems, (e) => e.score), prevScore: average(menuItems, (e) => e.prevScore ?? e.score), evalStatus: menuItems.some((e) => e.evalStatus === "fail") ? "fail" : "success", }); }); server.route({ method: "GET", url: "/api/eval", schema: { querystring: { type: "object", properties: { name: { type: "string" }, timestamp: { type: "string" }, }, required: ["name"], }, }, handler: async (req, res) => { const name = req.query.name; const evaluation = getEvalByName(opts.db, { name, timestamp: req.query.timestamp, }); if (!evaluation) { return res.code(404).send(); } const prevEvaluation = getPreviousCompletedEval(opts.db, name, evaluation.created_at); const results = getResults(opts.db, [evaluation.id, prevEvaluation?.id].filter((i) => typeof i === "number")); const scores = getScores(opts.db, results.map((r) => r.id)); const history = getHistoricalEvalsWithScoresByName(opts.db, name); return res.code(200).send({ history: history.map((h) => ({ score: h.average_score, date: h.created_at, })), evaluation: { ...evaluation, results: results .filter((r) => r.eval_id === evaluation.id) .map((r) => ({ ...r, scores: scores.filter((s) => s.result_id === r.id), })), }, prevEvaluation: prevEvaluation ? { ...prevEvaluation, results: results .filter((r) => r.eval_id === prevEvaluation.id) .map((r) => ({ ...r, scores: scores.filter((s) => s.result_id === r.id), })), } : undefined, }); }, }); server.route({ method: "GET", url: "/api/eval/result", schema: { querystring: { type: "object", properties: { name: { type: "string" }, index: { type: "string" }, timestamp: { type: "string" }, }, required: ["name", "index"], }, }, handler: async (req, res) => { const evaluation = getEvalByName(opts.db, { name: req.query.name, timestamp: req.query.timestamp, statuses: ["fail", "success"], }); if (!evaluation) { return res.code(404).send(); } const prevEvaluation = getPreviousCompletedEval(opts.db, req.query.name, evaluation.created_at); const results = getResults(opts.db, [evaluation.id, prevEvaluation?.id].filter((i) => typeof i === "number")); const thisEvaluationResults = results.filter((r) => r.eval_id === evaluation.id); const thisResult = thisEvaluationResults[Number(req.query.index)]; if (!thisResult) { return res.code(404).send(); } const prevEvaluationResults = results.filter((r) => r.eval_id === prevEvaluation?.id); const averageScores = getAverageScoresFromResults(opts.db, results.map((r) => r.id)); const scores = getScores(opts.db, results.map((r) => r.id)); const traces = getTraces(opts.db, results.map((r) => r.id)); const result = { ...thisResult, score: averageScores.find((s) => s.result_id === thisResult.id)?.average ?? 0, scores: scores.filter((s) => s.result_id === thisResult.id), traces: traces.filter((t) => t.result_id === thisResult.id), }; const prevResultInDb = prevEvaluationResults[Number(req.query.index)]; const prevResult = prevResultInDb ? { ...prevResultInDb, score: averageScores.find((s) => s.result_id === prevResultInDb.id) ?.average ?? 0, scores: scores.filter((s) => s.result_id === prevResultInDb.id), } : undefined; return res.code(200).send({ result, prevResult, evaluation, }); }, }); server.route({ method: "GET", url: "/api/file", schema: { querystring: { type: "object", properties: { path: { type: "string" }, download: { type: "boolean" }, }, required: ["path"], }, }, handler: async (req, res) => { const filePath = req.query.path; const parsed = path.parse(filePath); if (req.query.download) { return res .header("content-disposition", `attachment; filename="${parsed.base}"`) .sendFile(parsed.base, parsed.dir); } return res.sendFile(parsed.base, parsed.dir); }, }); return { updateState: websockets.updateState, start: (port) => { server.listen({ port, }, (err) => { if (err) { console.error(err); process.exit(1); } }); }, }; }; //# sourceMappingURL=server.js.map