aiwg
Version:
Deployment tool and support utility for AI context. Copies agents, skills, commands, rules, and behaviors into the paths each AI platform reads (Claude Code, Codex, Copilot, Cursor, Warp, OpenClaw, and 6 more) so one source of truth works across 10 platfo
43 lines (42 loc) • 953 B
JSON
{
"id": "aiwg-evals",
"type": "addon",
"name": "AIWG Evaluations",
"version": "1.0.0",
"description": "Automated evaluation framework for agent quality, based on KAMI benchmark methodology and ReAct patterns",
"core": false,
"autoInstall": false,
"author": "AIWG Contributors",
"license": "MIT",
"repository": "https://github.com/jmagly/aiwg",
"keywords": [
"aiwg",
"evals",
"testing",
"quality",
"benchmark",
"kami",
"react"
],
"researchFoundation": {
"REF-001": "BP-9 - Continuous evaluation of agent performance",
"REF-002": "KAMI benchmark - Real agentic task evaluation"
},
"entry": {
"tests": "tests/",
"scenarios": "scenarios/",
"reports": "reports/",
"skills": "skills/"
},
"commands": [
"eval-agent",
"eval-workflow",
"eval-report"
],
"scenarios": [
"grounding-test",
"distractor-test",
"recovery-test",
"parallel-test"
]
}