UNPKG

@blundergoat/goat-flow

Version:

AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.

494 lines 20 kB
import { SKILL_NAMES } from "../constants.js"; import { SETUP_CHECKS } from "./check-goat-flow.js"; import { AGENT_CHECKS } from "./check-agent-setup.js"; import { HARNESS_CHECKS } from "./harness/index.js"; import { CONTEXT_CHECKS } from "./harness/check-context.js"; import { CONSTRAINTS_CHECKS } from "./harness/check-constraints.js"; import { VERIFICATION_CHECKS } from "./harness/check-verification.js"; import { RECOVERY_CHECKS } from "./harness/check-recovery.js"; import { FEEDBACK_LOOP_CHECKS } from "./harness/check-feedback-loop.js"; import { loadManifest } from "../manifest/manifest.js"; import { scanSemanticDrift } from "./check-factual-semantic-drift.js"; const PROSE_TARGETS = [ "README.md", "CONTRIBUTING.md", ".goat-flow/architecture.md", ".goat-flow/code-map.md", ]; const DOC_GLOB = "docs/*.md"; const GLOSSARY_TARGET = ".goat-flow/glossary.md"; /** Files where a loose `N views`/`N presets` pattern is safe because the file * is dashboard-specific. Outside these files, the pattern would false-positive * on generic prose, so we keep it scoped. */ const DASHBOARD_SCOPED_TARGETS = ["docs/dashboard.md"]; /** Looser view/preset patterns that only run against DASHBOARD_SCOPED_TARGETS. */ const DASHBOARD_SCOPED_CHECKS = [ { rule: "dashboard-views-count-drift", pattern: /\b(\d+)\s+views?\b/gi, /** Return the live views count. */ actual: () => loadManifest().facts.dashboard_views.count, label: "views", }, { rule: "preset-count-drift", pattern: /\b(\d+)\s+presets?\b/gi, /** Return the live presets count. */ actual: () => loadManifest().facts.presets.count, label: "presets", }, ]; /** Live concern → check-count map, built from the harness check arrays that * are the single source of truth. Keyed by a normalised concern label so doc * phrasings like "Feedback Loop", "Feedback-Loop", and "feedback_loop" all * resolve to the same count. */ const CONCERN_SIZES = { context: CONTEXT_CHECKS.length, constraints: CONSTRAINTS_CHECKS.length, verification: VERIFICATION_CHECKS.length, recovery: RECOVERY_CHECKS.length, feedback_loop: FEEDBACK_LOOP_CHECKS.length, }; /** Normalise a doc-style concern label ("Feedback Loop", "Feedback-Loop", * "feedback_loop") into the CONCERN_SIZES key form. */ function normaliseConcern(raw) { return raw.toLowerCase().replace(/[\s-]+/g, "_"); } /** Return the live check count for one concern label. */ function concernActualFor(raw) { return CONCERN_SIZES[normaliseConcern(raw)]; } /** Alternation fragment shared by concern patterns. Kept in sync with the five * concern arrays imported above. */ const CONCERN_ALTERNATION = "Context|Constraints|Verification|Recovery|Feedback[\\s-]?Loop"; /** Per-concern drift patterns. All three patterns catch drift in current or * updated doc prose; the sample-output one scans fenced blocks because * the `audit-and-quality.md` sample lives inside a code fence. */ const CONCERN_CHECKS = [ { // Matches `**Context** (4)` bullet-list style (audit-and-quality.md:66) rule: "concern-count-drift-bullet", pattern: new RegExp(`\\*\\*(${CONCERN_ALTERNATION})\\*\\*\\s*\\((\\d+)\\)`, "g"), actualFor: concernActualFor, label: "concern bullet count", }, { // Matches `**Context checks (4):**` style from harness-audit.md prose. rule: "concern-count-drift-checks-label", pattern: new RegExp(`\\*\\*(${CONCERN_ALTERNATION})\\s+checks?\\s*\\((\\d+)\\)`, "gi"), actualFor: concernActualFor, label: "concern checks count", }, { // Matches `Context: PASS (3/3)` sample-output style inside fenced blocks rule: "concern-sample-output-drift", pattern: new RegExp(`\\b(${CONCERN_ALTERNATION}):\\s+(?:PASS|FAIL)\\s+\\(\\d+\\/(\\d+)\\)`, "g"), actualFor: concernActualFor, label: "concern sample-output total", scanFenced: true, }, ]; const COUNT_CHECKS = [ { rule: "skill-count-drift", pattern: /\b(\d+)\s+skills?\b/gi, /** Return the live skills count. */ actual: () => SKILL_NAMES.length, label: "skills", scopedTo: [".goat-flow/", "ai-docs/"], }, { rule: "agent-check-count-drift", pattern: /\b(\d+)\s+checks?\s+per\s+(?:configured\s+)?agent\b/gi, /** Return the live checks per configured agent count. */ actual: () => AGENT_CHECKS.length, label: "checks per configured agent", }, { rule: "harness-check-count-drift", pattern: /\b(\d+)\s+checks\s+across\s+\d+\s+concerns\b/gi, /** Return the live harness checks across 5 concerns count. */ actual: () => HARNESS_CHECKS.length, label: "harness checks across 5 concerns", }, { rule: "ai-harness-count-drift", pattern: /\b(\d+)\s+AI\s+[Hh]arness\b/g, /** Return the live AI harness installation checks count. */ actual: () => HARNESS_CHECKS.length, label: "AI harness installation checks", }, { rule: "harness-structural-count-drift", pattern: /\b(\d+)\s+structural\s+installation\s+checks?\b/gi, /** Return the live structural installation checks count. */ actual: () => HARNESS_CHECKS.length, label: "structural installation checks", }, { rule: "harness-scope-flag-count-drift", pattern: /AI\s+Harness\s+Completeness\s+scope\s*\((\d+)\s+checks?\b/gi, /** Return the live AI Harness Completeness scope count. */ actual: () => HARNESS_CHECKS.length, label: "AI Harness Completeness scope", }, { rule: "harness-checks-by-type-drift", pattern: /\bThe\s+(\d+)\s+checks?\s+by\s+type\b/gi, /** Return the live checks by type count. */ actual: () => HARNESS_CHECKS.length, label: "checks by type", }, { rule: "setup-check-count-drift", pattern: /\b(\d+)\s+checks\s+on\s+goat-flow-owned\s+surfaces\b/gi, /** Return the live setup checks on goat-flow-owned surfaces count. */ actual: () => SETUP_CHECKS.length, label: "setup checks on goat-flow-owned surfaces", }, { rule: "dashboard-views-count-drift", pattern: /\b(\d+)\s+dashboard\s+views?\b/gi, /** Return the live dashboard views count. */ actual: () => loadManifest().facts.dashboard_views.count, label: "dashboard views", }, { rule: "preset-count-drift", pattern: /\b(\d+)\s+workspace\s+presets?\b/gi, /** Return the live workspace presets count. */ actual: () => loadManifest().facts.presets.count, label: "workspace presets", }, ]; /** Check whether a line starts or ends a fenced code block. */ function isFenceLine(line) { return /^\s*```/.test(line); } /** CLI commands that were deliberately removed. Docs must not teach them. * Unlike count/path checks, this scanner runs on fenced lines too, because * the most common failure is copy-pasted command examples inside fences. */ const REMOVED_COMMANDS = [ { rule: "removed-command-quality-capture", // Match the fully-qualified form (`goat-flow quality capture`) and the // backticked shorthand (`` `quality capture` ``) that docs/glossaries use. pattern: /\bgoat-flow\s+quality\s+capture\b|`quality\s+capture`/g, message: "`goat-flow quality capture` was removed in v1.2.0; agents now write reports directly to `.goat-flow/logs/quality/`.", }, ]; /** * Scan one doc file for references to removed CLI commands. * * Runs across every line including fenced code blocks because fenced command * examples are the primary leak path this check exists to catch. * * @param path Repo-relative source path used in findings. * @param text Markdown content to scan. * @param removed Removed command patterns to flag. * @returns Content findings for removed command references. */ function scanRemovedCommands(path, text, removed = REMOVED_COMMANDS) { const findings = []; const lines = text.split(/\r?\n/); for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; for (const cmd of removed) { const rx = new RegExp(cmd.pattern.source, cmd.pattern.flags); if (rx.test(line)) { findings.push({ severity: "warning", rule: cmd.rule, path, line: i + 1, message: cmd.message, }); } } } return findings; } /** * Scan one doc file for numeric-count drift using the provided check set. * * By default, fenced code blocks are skipped because prose code samples should * not be drift-matched. Individual checks can opt in via `scanFenced: true` to * catch structural drift in sample-output blocks. * * @param path Repo-relative source path used in findings. * @param text Markdown content to scan. * @param checks Numeric claim checks to apply. * @returns Content findings for count claims that disagree with live code. */ function scanCountClaims(path, text, checks = COUNT_CHECKS) { const findings = []; const applicable = checks.filter((c) => !c.scopedTo || c.scopedTo.some((p) => path.startsWith(p))); const lines = text.split(/\r?\n/); let inCodeBlock = false; for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; if (isFenceLine(line)) { inCodeBlock = !inCodeBlock; continue; } for (const check of applicable) { if (inCodeBlock && !check.scanFenced) continue; const rx = new RegExp(check.pattern.source, check.pattern.flags); let match; while ((match = rx.exec(line)) !== null) { const claimed = Number(match[1]); const actual = check.actual(); if (claimed !== actual) { findings.push({ severity: "warning", rule: check.rule, path, line: i + 1, message: `Doc claims ${claimed} ${check.label}, code says ${actual}.`, suggestion: `Update "${match[0]}" to match the actual count (${actual}).`, }); } } } } return findings; } /** Apply one concern-count check to one line; returns any drift findings. * Extracted from `scanConcernCountClaims` to keep the outer loop under the * eslint complexity cap. */ function matchConcernCheckOnLine(line, lineNum, path, check) { const findings = []; const rx = new RegExp(check.pattern.source, check.pattern.flags); let match; while ((match = rx.exec(line)) !== null) { const concernRaw = match[1]; const claimedStr = match[2]; if (concernRaw === undefined || claimedStr === undefined) continue; const actual = check.actualFor(concernRaw); if (actual === undefined) continue; const claimed = Number(claimedStr); if (claimed === actual) continue; findings.push({ severity: "warning", rule: check.rule, path, line: lineNum, message: `${check.label}: doc says ${concernRaw} has ${claimed}, code says ${actual}.`, suggestion: `Update "${match[0]}" to match the ${concernRaw} concern's actual count (${actual}).`, }); } return findings; } /** * Scan one doc file for per-concern count drift. * * Each check's pattern must have two capture groups: (1) concern label, * (2) claimed number. The authoritative count is looked up via `actualFor`. * Fenced code blocks are skipped unless the check sets `scanFenced: true`. * * @param path Repo-relative source path used in findings. * @param text Markdown content to scan. * @param checks Concern-count checks to apply. * @returns Content findings for concern counts that disagree with live code. */ function scanConcernCountClaims(path, text, checks = CONCERN_CHECKS) { const findings = []; const lines = text.split(/\r?\n/); let inCodeBlock = false; for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; if (isFenceLine(line)) { inCodeBlock = !inCodeBlock; continue; } for (const check of checks) { if (inCodeBlock && !check.scanFenced) continue; findings.push(...matchConcernCheckOnLine(line, i + 1, path, check)); } } return findings; } /** * Extract backtick-wrapped repo-relative paths and flag ones that do not exist. * * @param path Repo-relative source path used in findings. * @param text Markdown content to scan. * @param ctx Audit context used for target filesystem existence checks. * @returns Informational findings for unresolved repo-local path references. */ function scanPathReferences(path, text, ctx) { const findings = []; const lines = text.split(/\r?\n/); let inCodeBlock = false; // Backtick-wrapped paths that look repo-local. const rx = /`([^`\s]+)`/g; for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; if (isFenceLine(line)) { inCodeBlock = !inCodeBlock; continue; } if (inCodeBlock) continue; let match; rx.lastIndex = 0; while ((match = rx.exec(line)) !== null) { const candidate = match[1] ?? ""; if (!looksLikeRepoPath(candidate)) continue; const cleaned = candidate.replace(/[)\].,;:]+$/, ""); // trim trailing punctuation if (INTENTIONAL_LOCAL_STATE_PATHS.has(cleaned)) continue; if (ctx.fs.exists(cleaned)) continue; findings.push({ severity: "info", rule: "path-ref-unresolved", path, line: i + 1, message: `Referenced path \`${cleaned}\` does not exist in the project.`, }); } } return findings; } const INTENTIONAL_LOCAL_STATE_PATHS = new Set([".goat-flow/project-id"]); /** Lifetime/retention/limit phrases that should name the enforcing constant. * When a doc claims "retained for 90 days" without anchoring the value to a * code path, future edits to the constant drift past the doc silently * (awslabs/cli-agent-orchestrator PR #245 P1-B: docs/memory.md claimed * scope-keyed retention while cleanup_service.py keyed on memory_type). */ const LIFETIME_PHRASE_RE = /\b(?:retained for|expires after|expires in|TTL(?:\s+of)?|ceiling of|max(?:imum)? of|limit of)\s+(\d+)\s+(days?|hours?|minutes?|seconds?|chars?|characters?|entries|items|sessions?|lines?)/gi; /** Evidence anchors that satisfy the lifetime-claim check: a backtick repo * path, a (search: ...) anchor, or a (file: ...) anchor on the same line. */ const EVIDENCE_ANCHOR_RE = /`(?:src|workflow|scripts|\.goat-flow|\.github|test|docs|\.claude|\.codex|\.agents)\/[^`]+`|\(search:\s*["'][^"']+["']\)|\(file:\s*[^)]+\)/u; /** * Scan one doc file for lifetime/retention claims lacking an enforcing-code anchor. * * Any line that claims a lifetime, expiry, TTL, ceiling, or limit MUST also * reference the code path that enforces the value. Without an anchor, future * edits to the constant drift past the doc and the divergence ships silently. * Fenced code blocks are excluded because sample output legitimately discusses * values without anchoring them. * * @param path Repo-relative source path used in findings. * @param text Markdown content to scan. * @returns Informational findings for lifetime claims without evidence anchors. */ function scanLifetimeClaimEvidence(path, text) { const findings = []; const lines = text.split(/\r?\n/); let inCodeBlock = false; for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; if (isFenceLine(line)) { inCodeBlock = !inCodeBlock; continue; } if (inCodeBlock) continue; const rx = new RegExp(LIFETIME_PHRASE_RE.source, LIFETIME_PHRASE_RE.flags); let match; while ((match = rx.exec(line)) !== null) { if (EVIDENCE_ANCHOR_RE.test(line)) continue; findings.push({ severity: "info", rule: "lifetime-claim-evidence-missing", path, line: i + 1, message: `Lifetime claim "${match[0]}" has no enforcing-code anchor on this line.`, suggestion: 'Add a backtick repo path (e.g. `src/cli/server/terminal.ts`) or `(search: "CONSTANT_NAME")` on the same line so future edits cannot silently drift.', }); } } return findings; } const REPO_PATH_PREFIXES = [ "src/", "workflow/", ".goat-flow/", "scripts/", "docs/", "test/", ".claude/", ".codex/", ".agents/", ".github/", ]; /** Check whether a token looks like a repo-local file path. */ function looksLikeRepoPath(candidate) { if (candidate.length < 3) return false; if (candidate.startsWith("http")) return false; // Glob patterns are not literal paths - skip them. if (candidate.includes("*") || candidate.includes("?")) return false; // Template placeholders are not literal on-disk paths. if (candidate.includes("{") || candidate.includes("}")) return false; if (candidate.includes("<") || candidate.includes(">")) return false; return REPO_PATH_PREFIXES.some((p) => candidate.startsWith(p)); } /** Collect the files that factual-claim checks should scan. */ function collectTargets(ctx) { const targets = []; for (const rel of PROSE_TARGETS) { if (ctx.fs.exists(rel)) targets.push(rel); } for (const rel of ctx.fs.glob(DOC_GLOB)) { targets.push(rel); } return targets; } /** * Run factual-claim checks across the configured documentation targets. * * Missing or unreadable target docs recover by skipping that file; unresolved * claims are emitted as content findings so audit can report all drift at once. * * @param ctx Audit context with target filesystem access. * @returns Factual-claim findings and number of scanned files. */ export function runFactualClaimChecks(ctx) { const findings = []; let filesScanned = 0; for (const rel of collectTargets(ctx)) { const text = ctx.fs.readFile(rel); if (text === null) continue; filesScanned++; findings.push(...scanCountClaims(rel, text)); findings.push(...scanConcernCountClaims(rel, text)); findings.push(...scanPathReferences(rel, text, ctx)); findings.push(...scanRemovedCommands(rel, text)); findings.push(...scanLifetimeClaimEvidence(rel, text)); } const glossary = ctx.fs.readFile(GLOSSARY_TARGET); if (glossary !== null) { findings.push(...scanRemovedCommands(GLOSSARY_TARGET, glossary)); } // Dashboard-specific loose patterns (safe only on dashboard docs). for (const rel of DASHBOARD_SCOPED_TARGETS) { if (!ctx.fs.exists(rel)) continue; const text = ctx.fs.readFile(rel); if (text === null) continue; findings.push(...scanCountClaims(rel, text, DASHBOARD_SCOPED_CHECKS)); } const semantic = scanSemanticDrift(ctx); return { findings: [...findings, ...semantic.findings], filesScanned: filesScanned + semantic.filesScanned, }; } //# sourceMappingURL=check-factual-claims.js.map