aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
43 lines (42 loc) • 957 B
JSON
{
"id": "aiwg-evals",
"type": "addon",
"name": "AIWG Evaluations",
"version": "1.0.0",
"description": "Automated evaluation framework for agent quality, based on KAMI benchmark methodology and ReAct patterns",
"core": false,
"autoInstall": false,
"author": "AIWG Contributors",
"license": "MIT",
"repository": "https://github.com/jmagly/aiwg",
"keywords": [
"aiwg",
"evals",
"testing",
"quality",
"benchmark",
"kami",
"react"
],
"researchFoundation": {
"REF-001": "BP-9 - Continuous evaluation of agent performance",
"REF-002": "KAMI benchmark - Real agentic task evaluation"
},
"entry": {
"tests": "tests/",
"scenarios": "scenarios/",
"reports": "reports/",
"commands": "commands/"
},
"commands": [
"eval-agent",
"eval-workflow",
"eval-report"
],
"scenarios": [
"grounding-test",
"distractor-test",
"recovery-test",
"parallel-test"
]
}