aiwg

Version:

Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo

aiwg.io

jmagly/aiwg

77 lines (65 loc) • 2.76 kB

text/typescript

View Raw

#!/usr/bin/env tsx /** * AIWG Model Evaluation Suite — CLI entry point * * Usage: * npx tsx src/index.ts <model-id> [options] * npx tsx src/index.ts hermes3:latest --backend ollama * npx tsx src/index.ts hermes3:latest --dimensions tool-use,coding * * @issue #433 */ import { Command } from 'commander'; import path from 'path'; import { promises as fs } from 'fs'; import { fileURLToPath } from 'url'; import { OllamaModel } from './models/ollama.js'; import { EvalRunner } from './runner.js'; import { generateMarkdownReport } from './reporters/markdown.js'; import { generateJsonReport } from './reporters/json.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const DATASETS_DIR = path.resolve(__dirname, '../datasets'); const program = new Command(); program .name('aiwg-eval') .description('AIWG model evaluation suite') .version('0.1.0') .argument('<model-id>', 'Model identifier (e.g., hermes3:latest)') .option('--backend <type>', 'Backend: ollama or api', 'ollama') .option('--dimensions <list>', 'Comma-separated dimensions to evaluate') .option('--output <format>', 'Output format: json or markdown', 'markdown') .option('--ollama-url <url>', 'Ollama API URL', 'http://localhost:11434') .option('--verbose', 'Show detailed progress', false) .action(async (modelId: string, options: Record<string, string | boolean>) => { console.log(`\nAIWG Model Evaluation Suite`); console.log(`Model: ${modelId}`); console.log(`Backend: ${options.backend}`); console.log(`Ollama URL: ${options['ollama-url'] || 'http://localhost:11434'}`); console.log(''); // Create model client const model = new OllamaModel(modelId, options['ollama-url'] as string); // Parse dimensions const dimensions = options.dimensions ? (options.dimensions as string).split(',').map((d: string) => d.trim()) : undefined; // Run evaluation const runner = new EvalRunner(model, DATASETS_DIR); const report = await runner.run({ dimensions, verbose: options.verbose as boolean, }); // Generate report const output = options.output === 'json' ? generateJsonReport(report) : generateMarkdownReport(report); console.log(output); // Save report const reportsDir = path.resolve(__dirname, '../reports'); await fs.mkdir(reportsDir, { recursive: true }); const timestamp = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19); const ext = options.output === 'json' ? 'json' : 'md'; const reportPath = path.join(reportsDir, `eval-${modelId.replace(/[:/]/g, '-')}-${timestamp}.${ext}`); await fs.writeFile(reportPath, output); console.log(`\nReport saved: ${reportPath}`); }); program.parse();