aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
557 lines (483 loc) • 16.7 kB
YAML
# FAIR Metadata Framework Schema
# Based on REF-056 FAIR Data Principles
# Issues: #233 (INDEX.md Generation), #235 (YAML Frontmatter)
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/fair-metadata/v1"
title: "FAIR Metadata Framework Schema"
description: |
Comprehensive metadata framework implementing FAIR principles for
automated INDEX.md generation and YAML frontmatter standards per REF-056.
type: object
required:
- version
- frontmatter_schema
- index_generation
properties:
version:
type: string
pattern: "^\\d+\\.\\d+\\.\\d+$"
default: "1.0.0"
frontmatter_schema:
$ref: "#/$defs/FrontmatterSchema"
index_generation:
$ref: "#/$defs/IndexGeneration"
$defs:
FrontmatterSchema:
type: object
description: "YAML frontmatter schema for REF-XXX documents"
properties:
fair_principles:
type: object
properties:
F1:
type: string
default: "Globally unique and persistent identifiers (REF-XXX)"
F4:
type: string
default: "Metadata registered in searchable resource (INDEX.md)"
I1:
type: string
default: "Formal, accessible language (YAML frontmatter)"
R1:
type: string
default: "Rich metadata with usage guidance"
required_fields:
type: array
items: { type: string }
default:
- ref
- title
- authors
- year
- category
- summary
optional_fields:
type: array
items: { type: string }
default:
- tags
- quality
- relevance
- citations
- applies_to
- doi
- url
- source_type
- evidence_level
field_definitions:
type: object
properties:
ref:
type: object
properties:
type: { type: string, default: "string" }
pattern: { type: string, default: "^REF-\\d{3}$" }
description: { type: string, default: "Unique reference identifier" }
example: { type: string, default: "REF-056" }
title:
type: object
properties:
type: { type: string, default: "string" }
max_length: { type: integer, default: 200 }
description: { type: string, default: "Full paper title" }
example: { type: string, default: "FAIR Data Principles for Research Artifacts" }
authors:
type: object
properties:
type: { type: string, default: "array" }
items: { type: string, default: "string" }
description: { type: string, default: "Author list" }
example: { type: array, default: ["Wilkinson et al."] }
year:
type: object
properties:
type: { type: string, default: "integer" }
min: { type: integer, default: 1900 }
max: { type: integer, default: 2030 }
description: { type: string, default: "Publication year" }
example: { type: integer, default: 2016 }
tags:
type: object
properties:
type: { type: string, default: "array" }
items: { type: string, default: "string" }
description: { type: string, default: "Searchable keywords" }
example:
type: array
default: ["research-management", "metadata", "interoperability"]
category:
type: object
properties:
type: { type: string, default: "string" }
enum:
type: array
default:
- agent-frameworks
- voice-profiles
- rag-retrieval
- quality-assurance
- sdlc-methodology
- research-management
- provenance
- reproducibility
description: { type: string, default: "Primary category" }
summary:
type: object
properties:
type: { type: string, default: "string" }
max_length: { type: integer, default: 500 }
description: { type: string, default: "Brief summary of paper" }
quality:
type: object
properties:
type: { type: string, default: "string" }
enum:
type: array
default: ["high", "moderate", "low", "very-low"]
description: { type: string, default: "GRADE quality level" }
relevance:
type: object
properties:
type: { type: string, default: "string" }
enum:
type: array
default: ["foundational", "high", "moderate", "supplementary"]
description: { type: string, default: "Relevance to AIWG" }
citations:
type: object
properties:
type: { type: string, default: "array" }
items: { type: string, default: "string" }
pattern: { type: string, default: "^REF-\\d{3}$" }
description: { type: string, default: "Papers this references" }
applies_to:
type: object
properties:
type: { type: string, default: "array" }
items: { type: string, default: "string" }
description: { type: string, default: "AIWG components this applies to" }
source_type:
type: object
properties:
type: { type: string, default: "string" }
enum:
type: array
default:
- peer-reviewed-journal
- conference-proceedings
- preprint
- industry-blog
- thesis
- book
- technical-report
description: { type: string, default: "Publication type for GRADE" }
evidence_level:
type: object
properties:
type: { type: string, default: "string" }
enum:
type: array
default:
- empirical-study
- systematic-review
- meta-analysis
- case-study
- opinion-piece
- tutorial
description: { type: string, default: "Evidence type for GRADE" }
template:
type: string
default: |
---
ref: REF-XXX
title: "Paper Title"
authors: [Author1, Author2]
year: 2024
tags: [tag1, tag2]
category: category-name
summary: Brief summary of the paper's key findings
quality: moderate
relevance: high
source_type: peer-reviewed-journal
evidence_level: empirical-study
citations:
- REF-001
- REF-002
applies_to:
- component1
- component2
doi: "10.xxxx/xxxxx"
url: "https://..."
---
example:
type: string
default: |
---
ref: REF-056
title: FAIR Data Principles for Research Artifacts
authors: [Wilkinson et al.]
year: 2016
tags: [research-management, metadata, interoperability]
category: research-management
summary: Principles for making research outputs Findable, Accessible, Interoperable, and Reusable
quality: high
relevance: foundational
source_type: peer-reviewed-journal
evidence_level: empirical-study
citations:
- REF-062
- REF-060
applies_to:
- artifact-management
- provenance-tracking
- research-integration
doi: "10.1038/sdata.2016.18"
---
IndexGeneration:
type: object
description: "Automated INDEX.md generation from frontmatter"
properties:
enabled:
type: boolean
default: true
fair_compliance:
type: string
default: "FAIR F4 - (Meta)data are registered or indexed in a searchable resource"
generation_config:
type: object
properties:
source_patterns:
type: array
items: { type: string }
default:
- "**/*.md"
- "!**/INDEX.md"
- "!**/README.md"
output_file:
type: string
default: "INDEX.md"
grouping:
type: string
enum: [category, year, relevance, alphabetical]
default: "category"
include_summary:
type: boolean
default: true
description: "Include first 280 chars of summary"
validate_links:
type: boolean
default: true
description: "Verify all cross-references resolve"
timestamp:
type: boolean
default: true
description: "Include last updated timestamp"
index_template:
type: string
default: |
# {directory_name} Index
> Auto-generated from YAML frontmatter. Do not edit manually.
> Last updated: {timestamp}
## Summary
- Total documents: {total_count}
- Categories: {category_count}
## By Category
{category_sections}
## Alphabetical
{alphabetical_list}
---
Generated by `aiwg index generate`
category_section_template:
type: string
default: |
### {category_name}
| Ref | Title | Year | Quality | Relevance |
|-----|-------|------|---------|-----------|
{rows}
row_template:
type: string
default: "| [{ref}]({file_path}) | {title} | {year} | {quality} | {relevance} |"
validation_rules:
type: array
items:
type: object
properties:
rule: { type: string }
severity: { type: string }
default:
- rule: "All required frontmatter fields present"
severity: "error"
- rule: "REF-XXX format valid"
severity: "error"
- rule: "Category in allowed list"
severity: "warning"
- rule: "Cross-references resolve"
severity: "error"
- rule: "Year within valid range"
severity: "warning"
# Frontmatter validation result
frontmatter_validation:
type: object
properties:
file:
type: string
valid:
type: boolean
errors:
type: array
items:
type: object
properties:
field: { type: string }
message: { type: string }
severity: { type: string }
warnings:
type: array
items:
type: object
properties:
field: { type: string }
message: { type: string }
# CLI commands
cli_commands:
index_generate:
command: "aiwg index generate [dir]"
description: "Generate INDEX.md from frontmatter"
options:
- name: "--validate"
description: "Validate cross-references"
- name: "--dry-run"
description: "Show what would be generated"
frontmatter_validate:
command: "aiwg research validate-frontmatter [dir]"
description: "Validate all frontmatter in directory"
options:
- name: "--fix"
description: "Auto-fix where possible"
- name: "--strict"
description: "Fail on warnings"
frontmatter_add:
command: "aiwg research add-frontmatter <file>"
description: "Add frontmatter to existing document"
options:
- name: "--interactive"
description: "Prompt for values"
frontmatter_report:
command: "aiwg research frontmatter-report"
description: "Report on frontmatter coverage"
# Agent protocol
agent_protocol:
generate_index:
description: "Generate INDEX.md from frontmatter"
steps:
- scan_directory_for_markdown
- for_each_file:
- extract_yaml_frontmatter
- validate_required_fields
- parse_metadata
- group_by_category
- sort_within_groups
- validate_cross_references
- render_index_template
- write_index_file
- return_generation_report
validate_frontmatter:
description: "Validate frontmatter in document"
steps:
- read_file
- extract_yaml_frontmatter
- check_required_fields
- validate_field_formats
- check_enum_values
- validate_cross_references
- return_validation_result
add_frontmatter:
description: "Add frontmatter to existing document"
steps:
- read_existing_content
- extract_metadata_hints_from_content
- prompt_for_missing_values
- validate_frontmatter
- prepend_frontmatter_to_content
- write_updated_file
# CI integration
ci_integration:
pre_commit_hook:
description: "Validate frontmatter on commit"
script: |
#!/bin/bash
# Validate frontmatter for staged markdown files
STAGED=$(git diff --cached --name-only --diff-filter=AM | grep -E "\.md$")
for FILE in $STAGED; do
if ! aiwg research validate-frontmatter "$FILE" --quiet; then
echo "ERROR: Invalid frontmatter in $FILE"
exit 1
fi
done
index_freshness_check:
description: "Ensure INDEX.md is up to date"
script: |
#!/bin/bash
# Check if INDEX.md needs regeneration
aiwg index generate --dry-run --check
if [ $? -ne 0 ]; then
echo "ERROR: INDEX.md is out of date"
echo "Run: aiwg index generate"
exit 1
fi
# Storage
storage:
indices: "**/INDEX.md"
frontmatter_schema: "agentic/code/frameworks/sdlc-complete/schemas/flows/frontmatter-schema.yaml"
# Research targets (from REF-056 FAIR)
research_targets:
findable_f1: "Globally unique REF-XXX identifiers"
findable_f4: "Searchable INDEX.md from frontmatter"
interoperable_i1: "Formal YAML frontmatter schema"
reusable_r1: "Rich metadata with categories and tags"
# Example INDEX.md output
example_index_output: |
# Research Corpus Index
> Auto-generated from YAML frontmatter. Do not edit manually.
> Last updated: 2026-01-25T14:30:00Z
## Summary
- Total documents: 62
- Categories: 8
## By Category
### agent-frameworks
| Ref | Title | Year | Quality | Relevance |
|-----|-------|------|---------|-----------|
| [REF-013](corpus/REF-013-metagpt.md) | MetaGPT: Multi-Agent Framework | 2023 | high | foundational |
| [REF-021](corpus/REF-021-reflexion.md) | Reflexion: Language Agents | 2023 | high | high |
| [REF-022](corpus/REF-022-autogen.md) | AutoGen: Multi-Agent Conversation | 2023 | high | high |
### research-management
| Ref | Title | Year | Quality | Relevance |
|-----|-------|------|---------|-----------|
| [REF-056](corpus/REF-056-fair.md) | FAIR Data Principles | 2016 | high | foundational |
| [REF-060](corpus/REF-060-grade.md) | GRADE Evidence Quality | 2011 | high | foundational |
| [REF-061](corpus/REF-061-oais.md) | OAIS Reference Model | 2012 | high | high |
### voice-profiles
| Ref | Title | Year | Quality | Relevance |
|-----|-------|------|---------|-----------|
| [REF-043](corpus/REF-043-voice.md) | Voice Consistency in AI Writing | 2024 | moderate | high |
## Alphabetical
- [REF-013](corpus/REF-013-metagpt.md) - MetaGPT: Multi-Agent Framework
- [REF-021](corpus/REF-021-reflexion.md) - Reflexion: Language Agents
- [REF-022](corpus/REF-022-autogen.md) - AutoGen: Multi-Agent Conversation
- [REF-043](corpus/REF-043-voice.md) - Voice Consistency in AI Writing
- [REF-056](corpus/REF-056-fair.md) - FAIR Data Principles
- [REF-060](corpus/REF-060-grade.md) - GRADE Evidence Quality
- [REF-061](corpus/REF-061-oais.md) - OAIS Reference Model
---
Generated by `aiwg index generate`
# References
references:
research:
- "@.aiwg/research/findings/REF-056-fair-principles.md"
implementation:
- "#233"
- "#235"
related:
- "@.aiwg/research/corpus/"
- "@agentic/code/frameworks/sdlc-complete/schemas/flows/grade-evidence-quality.yaml"
- "@agentic/code/frameworks/sdlc-complete/schemas/flows/citation-integrity.yaml"