aiwg
Version:
Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo
77 lines (65 loc) • 2.76 kB
text/typescript
/**
* AIWG Model Evaluation Suite — CLI entry point
*
* Usage:
* npx tsx src/index.ts <model-id> [options]
* npx tsx src/index.ts hermes3:latest --backend ollama
* npx tsx src/index.ts hermes3:latest --dimensions tool-use,coding
*
* @issue #433
*/
import { Command } from 'commander';
import path from 'path';
import { promises as fs } from 'fs';
import { fileURLToPath } from 'url';
import { OllamaModel } from './models/ollama.js';
import { EvalRunner } from './runner.js';
import { generateMarkdownReport } from './reporters/markdown.js';
import { generateJsonReport } from './reporters/json.js';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const DATASETS_DIR = path.resolve(__dirname, '../datasets');
const program = new Command();
program
.name('aiwg-eval')
.description('AIWG model evaluation suite')
.version('0.1.0')
.argument('<model-id>', 'Model identifier (e.g., hermes3:latest)')
.option('--backend <type>', 'Backend: ollama or api', 'ollama')
.option('--dimensions <list>', 'Comma-separated dimensions to evaluate')
.option('--output <format>', 'Output format: json or markdown', 'markdown')
.option('--ollama-url <url>', 'Ollama API URL', 'http://localhost:11434')
.option('--verbose', 'Show detailed progress', false)
.action(async (modelId: string, options: Record<string, string | boolean>) => {
console.log(`\nAIWG Model Evaluation Suite`);
console.log(`Model: ${modelId}`);
console.log(`Backend: ${options.backend}`);
console.log(`Ollama URL: ${options['ollama-url'] || 'http://localhost:11434'}`);
console.log('');
// Create model client
const model = new OllamaModel(modelId, options['ollama-url'] as string);
// Parse dimensions
const dimensions = options.dimensions
? (options.dimensions as string).split(',').map((d: string) => d.trim())
: undefined;
// Run evaluation
const runner = new EvalRunner(model, DATASETS_DIR);
const report = await runner.run({
dimensions,
verbose: options.verbose as boolean,
});
// Generate report
const output = options.output === 'json'
? generateJsonReport(report)
: generateMarkdownReport(report);
console.log(output);
// Save report
const reportsDir = path.resolve(__dirname, '../reports');
await fs.mkdir(reportsDir, { recursive: true });
const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
const ext = options.output === 'json' ? 'json' : 'md';
const reportPath = path.join(reportsDir, `eval-${modelId.replace(/[:/]/g, '-')}-${timestamp}.${ext}`);
await fs.writeFile(reportPath, output);
console.log(`\nReport saved: ${reportPath}`);
});
program.parse();