UNPKG

@blundergoat/goat-flow

Version:

AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.

365 lines 15.5 kB
import { SKILL_NAMES } from "../constants.js"; import { getInstalledSkillRoots, getSkillFiles } from "../manifest/manifest.js"; /** Static target scope for full content-quality checks: truth-bearing prose. * Learning-loop buckets (footguns/lessons) and ADR files are resolved * separately at scan time - see LEARNING_LOOP_DIRS and listDecisionMarkdown. */ const STATIC_QUALITY_TARGETS = [ // Hot-path instruction files "CLAUDE.md", "AGENTS.md", ".github/copilot-instructions.md", // Canonical docs ".goat-flow/architecture.md", ".goat-flow/code-map.md", ".goat-flow/glossary.md", // Shared meta references (composed into every skill) ".goat-flow/skill-docs/README.md", ".goat-flow/skill-docs/skill-preamble.md", ".goat-flow/skill-docs/skill-conventions.md", // Standalone playbooks (loaded on-demand by skills/agents) ".goat-flow/skill-docs/playbooks/README.md", ".goat-flow/skill-docs/playbooks/browser-use.md", ".goat-flow/skill-docs/playbooks/gruff-code-quality.md", ".goat-flow/skill-docs/playbooks/page-capture.md", ".goat-flow/skill-docs/skill-quality-testing/README.md", ".goat-flow/skill-docs/skill-quality-testing/tdd-iteration.md", ".goat-flow/skill-docs/skill-quality-testing/adversarial-framing.md", ".goat-flow/skill-docs/skill-quality-testing/deployment.md", // Public docs "docs/cli.md", "docs/skills.md", "docs/audit-and-quality.md", // ADR index. ADR-NNN files are discovered dynamically so new decisions do // not fall out of content-quality coverage. ".goat-flow/learning-loop/decisions/README.md", ".goat-flow/learning-loop/decisions/INDEX.md", // Setup templates "workflow/setup/01-system-overview.md", "workflow/setup/02-instruction-file.md", "workflow/setup/03-install-skills.md", "workflow/setup/04-architecture-code-map.md", "workflow/setup/05-customise-to-project.md", "workflow/setup/06-final-verification.md", "workflow/setup/agents/claude.md", "workflow/setup/agents/codex.md", "workflow/setup/agents/antigravity.md", "workflow/setup/agents/copilot.md", "workflow/setup/reference/ADR-000-template.md", "workflow/setup/reference/execution-loop.md", "workflow/setup/reference/footguns-readme.md", "workflow/setup/reference/lessons-readme.md", "workflow/setup/reference/reference-coding-guidelines.md", "workflow/setup/reference/reference-polish.md", "workflow/setup/reference/plans-readme.md", "workflow/setup/reference/scratchpad-readme.md", ]; const DECISIONS_DIR = ".goat-flow/learning-loop/decisions/"; /** Learning-loop buckets. Scanned in restricted mode (no vague-term checks) * because the Symptoms/Why/Evidence sections describe past incidents and * legitimately use words like "correctly"/"properly". Generic-instruction and * non-actionable detectors still apply - those patterns should never appear * in actionable Prevention blocks. */ const LEARNING_LOOP_DIRS = [ ".goat-flow/learning-loop/footguns/", ".goat-flow/learning-loop/lessons/", ".goat-flow/learning-loop/patterns/", ]; const VAGUE_TERMS = [ { term: "properly", /** Build the "properly" suggestion. */ suggestion: (line) => /format|style/i.test(line) ? "Specify the exact format or style guide (e.g. 'Follow Prettier defaults' or 'Use 2-space indentation')." : "Be specific about the expected format or standard (e.g. 'Use 2-space indentation' instead of 'Format properly').", }, { term: "correctly", /** Build the "correctly" suggestion. */ suggestion: (_line) => "Define what 'correct' means with measurable criteria.", }, { term: "appropriately", /** Build the "appropriately" suggestion. */ suggestion: (_line) => "Describe the specific situation and the expected response.", }, ]; const GENERIC_INSTRUCTIONS = [ { rule: "generic-best-practices", pattern: /follow\s+best\s+practices/i, severity: "warning", /** Build the generic best practices finding message. */ message: () => "Avoid generic 'follow best practices'. Be specific about which practice applies here.", }, { rule: "generic-good-code", pattern: /write\s+good\s+code/i, severity: "warning", /** Build the generic good code finding message. */ message: () => "Avoid vague 'write good code'. Be specific about the standards the reader must meet.", }, { rule: "generic-correct", pattern: /do\s+it\s+correctly/i, severity: "warning", /** Build the generic correct finding message. */ message: () => "Avoid generic 'do it correctly'. Define what correct means with measurable criteria.", }, { rule: "generic-common-sense", pattern: /use\s+common\s+sense/i, severity: "warning", /** Build the generic common sense finding message. */ message: () => "Avoid 'use common sense'. Document the specific decision criteria the reader should apply.", }, { rule: "generic-be-careful", pattern: /be\s+careful/i, severity: "warning", /** Build the generic be careful finding message. */ message: () => "Instead of 'be careful', specify the exact risk and mitigation.", }, ]; const NON_ACTIONABLE = [ { // `note` dropped from cclint's term list - too many false positives on // goat-flow's own docs: label usage (`Note:`), direct-object verbs // (`note them`, `Note what X`) all match cclint's `(?!\s+to\s+)` guard // but are legitimate instructions. `remember | keep in mind | don't // forget` retain the non-actionable signal without the label clash. rule: "non-actionable-remember", pattern: /(?:\bremember\b|\bkeep in mind\b|\bdon'?t forget\b)(?!\s+to\s+)/i, severity: "info", /** Build the non actionable remember finding message. */ message: (match) => `"${match}" without "to <verb>" has no action. State what the reader must do.`, }, { rule: "non-actionable-important", pattern: /it'?s\s+important(?!\s+to\s+)/i, severity: "info", /** Build the non actionable important finding message. */ message: () => '"it\'s important" without "to <verb>" leaves the expected action unspecified.', }, { rule: "non-actionable-should-know", pattern: /you\s+should\s+know(?!\s+that\s+)/i, severity: "info", /** Build the non actionable should know finding message. */ message: () => '"you should know" without "that <fact>" has no propositional content.', }, ]; /** * Legacy v1.0 six-step Execution Loop drift. Matches only the * arrow-sequence declaration, not incidental historical prose mentioning * CLASSIFY or LOG. All four reviewed v1.2 consumer projects (ambient-scribe, * sus-form-detector, blundergoat-platform, rampart) shipped AGENTS.md with * the legacy six-step loop while CLAUDE.md + skill-preamble.md used the v1.2 * four-step. */ const LEGACY_EXECUTION_LOOP = [ { rule: "legacy-execution-loop-classify", pattern: /\bREAD\s*(?:→|-+>)\s*CLASSIFY\s*(?:→|-+>)\s*SCOPE\b/i, severity: "warning", /** Build the legacy loop CLASSIFY finding message. */ message: () => "Legacy v1.0 Execution Loop detected (READ → CLASSIFY → SCOPE → ACT → VERIFY → LOG). The v1.2 loop is four steps: READ → SCOPE → ACT → VERIFY. Rewrite per workflow/setup/reference/execution-loop.md.", }, { rule: "legacy-execution-loop-trailing-log", pattern: /\bVERIFY\s*(?:→|-+>)\s*LOG\b/i, severity: "warning", /** Build the legacy loop trailing-LOG finding message. */ message: () => "Legacy 'VERIFY → LOG' step detected. The v1.2 Execution Loop ends at VERIFY; session logging is finalised at step-06, not as an inline loop step.", }, ]; const PROMPT_WRAPPER_RESIDUE = [ { rule: "prompt-wrapper-residue", pattern: /<\/?(?:content|invoke)\b[^>]*>/i, severity: "warning", /** Build the prompt wrapper residue finding message. */ message: (match) => `Prompt wrapper residue "${match}" found in committed prose. Remove model/invocation wrapper tags from repository content.`, }, ]; const STALE_SKILL_PLAYBOOKS_PATH = [ { rule: "stale-skill-playbooks-path", pattern: /\.goat-flow\/skill-playbooks\/|(?<!skill-docs\/)skill-playbooks\//i, severity: "warning", /** Build the stale skill-playbooks path finding message. */ message: () => "Stale skill-playbooks path found. Current installed playbooks live under .goat-flow/skill-docs/playbooks/; workflow templates live under workflow/skills/playbooks/.", }, ]; const HISTORICAL_REFERENCE_DIRS = [DECISIONS_DIR, ...LEARNING_LOOP_DIRS]; /** Preserve old paths in learning-loop history while rejecting them in active guidance. */ function shouldScanStaleSkillPlaybooksPath(path) { return !HISTORICAL_REFERENCE_DIRS.some((dir) => path.startsWith(dir)); } /** One iteration of code-block state: toggled on fence lines, guards all matchers. */ function isFenceLine(line) { return /^\s*```/.test(line); } /** Detect a Markdown table separator row, e.g. `| --- | :---: | ---: |`. * A header row is identified by being immediately followed by such a separator; * cells in header rows are column labels, not instructional prose. */ function isTableSeparatorLine(line) { return /^\s*\|?\s*:?-{2,}:?\s*(\|\s*:?-{2,}:?\s*)+\|?\s*$/.test(line); } /** Apply a PatternRule array to a line, accumulating any matches into findings. */ function applyPatternRules(rules, line, lineNumber, path, findings) { for (const rule of rules) { const match = rule.pattern.exec(line); if (!match) continue; findings.push({ severity: rule.severity, rule: rule.rule, path, line: lineNumber, message: rule.message(match[0], line), }); } } /** Apply vague-term detection to a line (full mode only). */ function applyVagueTerms(line, lineNumber, path, findings) { for (const { term, suggestion } of VAGUE_TERMS) { const rx = new RegExp(`\\b${term}\\b`, "i"); const match = rx.exec(line); if (!match) continue; findings.push({ severity: "info", rule: "vague-term", path, line: lineNumber, message: `Vague term "${match[0]}" - no measurable standard.`, suggestion: suggestion(line), }); } } /** Scan one line for vague, generic, non-actionable, or legacy-loop guidance. */ function scanLine(line, lineNumber, path, findings, mode = "full") { if (mode === "full") { applyVagueTerms(line, lineNumber, path, findings); } applyPatternRules(GENERIC_INSTRUCTIONS, line, lineNumber, path, findings); applyPatternRules(NON_ACTIONABLE, line, lineNumber, path, findings); applyPatternRules(PROMPT_WRAPPER_RESIDUE, line, lineNumber, path, findings); if (shouldScanStaleSkillPlaybooksPath(path)) { applyPatternRules(STALE_SKILL_PLAYBOOKS_PATH, line, lineNumber, path, findings); } if (!path.startsWith("workflow/setup/")) { applyPatternRules(LEGACY_EXECUTION_LOOP, line, lineNumber, path, findings); } } /** * Scan one file, skipping fenced code blocks before applying prose detectors. * * Pass `mode: "restricted"` for learning-loop files to skip vague-term checks * on incident-description prose while still rejecting generic instructions. * * @param path - Repo-relative path used in emitted findings and mode-specific rules. * @param text - Markdown or instruction-file content to scan. * @param mode - Detector set to apply for the target surface. * @returns Content-quality findings found outside fenced code blocks. */ export function scanContentQuality(path, text, mode = "full") { const findings = []; const lines = text.split(/\r?\n/); let inCodeBlock = false; for (let i = 0; i < lines.length; i++) { const line = lines[i] ?? ""; if (isFenceLine(line)) { inCodeBlock = !inCodeBlock; continue; } if (inCodeBlock) continue; if (line.includes("|") && isTableSeparatorLine(lines[i + 1] ?? "")) { continue; } scanLine(line, i + 1, path, findings, mode); } return findings; } /** * List current ADR files in a deterministic order. * * ADR content is a stable truth surface, and discovering `ADR-NNN-*.md` files * at runtime keeps new decisions inside content-quality coverage without * requiring a second hard-coded target list. */ function listDecisionMarkdown(ctx) { if (!ctx.fs.exists(DECISIONS_DIR)) return []; return ctx.fs .listDir(DECISIONS_DIR) .filter((name) => /^ADR-\d{3}-.+\.md$/.test(name)) .sort() .map((name) => `${DECISIONS_DIR}${name}`); } /** * Resolve the full scan target list. * * The target set is assembled here because static truth surfaces, current ADRs, * and every installed skill file are maintained by different setup paths; a * single de-duped resolver avoids coverage drift between those sources. */ function resolveTargets(ctx) { const targets = new Set([ ...STATIC_QUALITY_TARGETS, ...listDecisionMarkdown(ctx), ]); for (const agentDir of getInstalledSkillRoots()) { for (const name of SKILL_NAMES) { for (const relativeFile of getSkillFiles(name)) { targets.add(`${agentDir}/${name}/${relativeFile}`); } } } return [...targets]; } /** List `<dir>/*.md` entries, excluding README.md. Used to pick up learning-loop * buckets without resolving hidden or non-markdown files. */ function listBucketMarkdown(ctx, dir) { if (!ctx.fs.exists(dir)) return []; return ctx.fs .listDir(dir) .filter((name) => name.endsWith(".md") && name !== "README.md") .map((name) => `${dir}${name}`); } /** * Run content-quality checks across the configured documentation targets. * * Missing files and unreadable targets are skipped; the function reports prose * findings for available surfaces instead of failing the whole audit on * optional buckets. * * @param ctx - Audit context containing the read-only project filesystem. * @returns Findings plus the count of files that were actually scanned. */ export function runContentQualityChecks(ctx) { const findings = []; let filesScanned = 0; for (const rel of resolveTargets(ctx)) { if (!ctx.fs.exists(rel)) continue; const text = ctx.fs.readFile(rel); if (text === null) continue; filesScanned++; findings.push(...scanContentQuality(rel, text, "full")); } for (const dir of LEARNING_LOOP_DIRS) { for (const rel of listBucketMarkdown(ctx, dir)) { const text = ctx.fs.readFile(rel); if (text === null) continue; filesScanned++; findings.push(...scanContentQuality(rel, text, "restricted")); } } return { findings, filesScanned }; } //# sourceMappingURL=check-content-quality.js.map