UNPKG

evalite

Version:

Test your LLM-powered apps with a TypeScript-native, Vitest-based eval runner. No API key required.

mattpocock/evalite

177 lines • 5.58 kB

JavaScript

import { runEvalite } from "./run-evalite.js"; import { buildApplication, buildCommand, buildRouteMap } from "@stricli/core"; import { buildInstallCommand, buildUninstallCommand, } from "@stricli/auto-complete"; import { createRequire } from "node:module"; import { exportStaticUI } from "./export-static.js"; import { createSqliteStorage } from "./storage/sqlite.js"; import path from "node:path"; import { DB_LOCATION } from "./backend-only-constants.js"; const packageJson = createRequire(import.meta.url)("../package.json"); const commonParameters = { positional: { kind: "tuple", parameters: [{ parse: String, brief: "path", optional: true }], }, flags: { threshold: { kind: "parsed", parse: parseFloat, brief: "Fails the process if the score is below threshold. Specified as 0-100. Default is 100.", optional: true, }, outputPath: { kind: "parsed", parse: String, brief: "Path to write test results in JSON format after evaluation completes.", optional: true, }, hideTable: { kind: "boolean", brief: "Hides the detailed table output in the CLI.", optional: true, }, }, }; export const createProgram = (commands) => { const runOnce = buildCommand({ parameters: commonParameters, func: async (flags, path) => { return commands.runOnceAtPath({ path, threshold: flags.threshold, outputPath: flags.outputPath, hideTable: flags.hideTable, }); }, docs: { brief: "Run evals once and exit", }, }); const serve = buildCommand({ parameters: commonParameters, func: (flags, path) => { return commands.serve({ path, threshold: flags.threshold, outputPath: flags.outputPath, hideTable: flags.hideTable, }); }, docs: { brief: "Run evals once and serve UI", }, }); const watch = buildCommand({ parameters: commonParameters, func: (flags, path) => { if (flags.outputPath) { throw new Error("--outputPath is not supported in watch mode. Use 'evalite --outputPath <path>' instead."); } return commands.watch({ path, threshold: flags.threshold, outputPath: flags.outputPath, hideTable: flags.hideTable, }); }, docs: { brief: "Watch evals for file changes", }, }); const exportCmd = buildCommand({ parameters: { flags: { output: { kind: "parsed", parse: String, brief: "Output directory for static export (default: ./evalite-export)", optional: true, }, runId: { kind: "parsed", parse: parseInt, brief: "Specific run ID to export (default: latest)", optional: true, }, }, }, func: (flags) => { return commands.export({ output: flags.output, runId: flags.runId }); }, docs: { brief: "Export static UI bundle for CI artifacts", }, }); const routes = buildRouteMap({ routes: { run: runOnce, serve, watch, export: exportCmd, install: buildInstallCommand("evalite", { bash: "__evalite_bash_complete", }), uninstall: buildUninstallCommand("evalite", { bash: true }), }, defaultCommand: "run", docs: { brief: "", hideRoute: { install: true, uninstall: true, }, }, }); return buildApplication(routes, { name: packageJson.name, versionInfo: { currentVersion: packageJson.version, }, }); }; export const program = createProgram({ watch: (path) => { return runEvalite({ path: path.path, scoreThreshold: path.threshold, cwd: undefined, mode: "watch-for-file-changes", outputPath: path.outputPath, hideTable: path.hideTable, }); }, runOnceAtPath: (path) => { return runEvalite({ path: path.path, scoreThreshold: path.threshold, cwd: undefined, mode: "run-once-and-exit", outputPath: path.outputPath, }); }, serve: (path) => { return runEvalite({ path: path.path, scoreThreshold: path.threshold, cwd: undefined, mode: "run-once-and-serve", outputPath: path.outputPath, }); }, export: async (opts) => { const cwd = process.cwd(); const dbPath = path.join(cwd, DB_LOCATION); const storage = await createSqliteStorage(dbPath); try { await exportStaticUI({ storage, outputPath: opts.output ?? "./evalite-export", runId: opts.runId, }); } finally { await storage.close(); } }, }); //# sourceMappingURL=command.js.map