bff-eval
Version:
Evaluation framework for LLM systems - Node.js CLI
161 lines (160 loc) • 6.44 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.cli = void 0;
const yargs_1 = __importDefault(require("yargs"));
const helpers_1 = require("yargs/helpers");
const path = __importStar(require("node:path"));
const fs = __importStar(require("node:fs"));
exports.cli = (0, yargs_1.default)((0, helpers_1.hideBin)(process.argv))
.scriptName("bff-eval")
.usage("$0 [options]")
.version("0.1.1")
.help()
.strict()
.option("input", {
alias: "i",
type: "string",
description: "Input JSONL file containing test cases",
})
.option("grader", {
alias: "g",
type: "string",
description: "Grader file to use for evaluation",
})
.option("demo", {
alias: "d",
type: "string",
description: "Run a demo from the examples folder (omit value for random demo)",
})
.option("model", {
alias: "m",
type: "string",
description: "LLM model to use",
default: "gpt-4",
})
.option("output", {
alias: "o",
type: "string",
description: "Output file for results",
})
.option("verbose", {
alias: "v",
type: "boolean",
description: "Enable verbose logging",
default: false,
})
.epilogue("Run LLM evaluation with graders")
.example("$0 --input test.jsonl --grader grader.js", "Run evaluation with test cases from test.jsonl using grader.js")
.example("$0 -i data.jsonl -g eval.js -m gpt-3.5-turbo -o results.json", "Run with custom model and output file")
.example("$0 --demo json-validator", "Run the json-validator demo")
.example("$0 --demo", "Run a random demo from the examples folder")
.check((argv) => {
// Check for mutually exclusive options
if (argv.demo !== undefined && (argv.input || argv.grader)) {
throw new Error("--demo flag is mutually exclusive with --input and --grader flags");
}
// Check that either demo or both input/grader are provided
if (argv.demo === undefined && (!argv.input || !argv.grader)) {
throw new Error("Please provide either --demo flag or both --input and --grader arguments");
}
// If using demo, resolve the demo paths
if (argv.demo !== undefined) {
const examplesDir = path.join(__dirname, "..", "..", "examples");
let demoName = argv.demo;
// If demo value is empty string (from --demo with no value), pick a random demo
if (demoName === "" || typeof demoName === "boolean") {
try {
const demos = fs.readdirSync(examplesDir, { withFileTypes: true })
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name);
if (demos.length === 0) {
throw new Error("No demo folders found in examples directory");
}
demoName = demos[Math.floor(Math.random() * demos.length)];
console.log(`Randomly selected demo: ${demoName}`);
}
catch (error) {
throw new Error(`Failed to read examples directory: ${error instanceof Error ? error.message : String(error)}`);
}
}
const demoDir = path.join(examplesDir, demoName);
const graderPath = path.join(demoDir, "grader.js");
const samplesPath = path.join(demoDir, "samples.jsonl");
// Check if demo exists
if (!fs.existsSync(demoDir)) {
// List available demos
try {
const demos = fs.readdirSync(examplesDir, { withFileTypes: true })
.filter(dirent => dirent.isDirectory())
.map(dirent => dirent.name);
throw new Error(`Demo '${demoName}' not found. Available demos: ${demos.join(", ") || "(none)"}`);
}
catch (error) {
if (error instanceof Error && error.message.includes("Demo")) {
throw error;
}
throw new Error(`Demo '${demoName}' not found and couldn't list available demos`);
}
}
// Check if required files exist
if (!fs.existsSync(graderPath)) {
throw new Error(`Demo '${demoName}' is missing grader.js file`);
}
if (!fs.existsSync(samplesPath)) {
throw new Error(`Demo '${demoName}' is missing samples.jsonl file`);
}
// Set the input and grader paths
argv.input = samplesPath;
argv.grader = graderPath;
console.log(`Running demo: ${demoName}`);
}
else {
// Resolve paths relative to current working directory
if (argv.input) {
argv.input = path.resolve(process.cwd(), argv.input);
}
if (argv.grader) {
argv.grader = path.resolve(process.cwd(), argv.grader);
}
if (argv.output) {
argv.output = path.resolve(process.cwd(), argv.output);
}
}
return true;
});