@blundergoat/goat-flow
Version:
AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.
455 lines • 20.5 kB
JavaScript
/**
* Semantic-drift scanners for high-trust cold-path docs (code-map, glossary, ADRs). Where the
* factual-claims checks compare exact strings, these read live source - classifier state unions,
* server constants, the manifest - and flag the curated docs that quietly fall out of sync with it.
* Runs only under `--check-content` because reading source on every audit would be too expensive.
*/
import { AUDIT_VERSION } from "../constants.js";
import { loadManifest } from "../manifest/manifest.js";
/** Extract the current classify-state union members from source. */
function readProjectStates(ctx) {
const source = ctx.fs.readFile("src/cli/classify-state.ts");
if (source === null)
return [];
const block = source.match(/type ProjectStateName =([\s\S]*?);/);
if (!block || block[1] === undefined)
return [];
return Array.from(block[1].matchAll(/"([^"]+)"/g)).flatMap((m) => m[1] === undefined ? [] : [m[1]]);
}
/** Extract the MAX_SESSIONS constant from the terminal server source. */
function readMaxSessions(ctx) {
const source = ctx.fs.readFile("src/cli/server/terminal.ts");
if (source === null)
return null;
const match = source.match(/MAX_SESSIONS\s*=\s*(\d+)/);
return match ? Number(match[1]) : null;
}
/** Extract the default terminal idle timeout from the terminal server source. */
function readDefaultIdleTimeout(ctx) {
const source = ctx.fs.readFile("src/cli/server/terminal.ts");
if (source === null)
return null;
const match = source.match(/DEFAULT_IDLE_TIMEOUT_MINUTES\s*=\s*(\d+)/);
return match ? Number(match[1]) : null;
}
/** Normalise display names for docs that list runner names. */
function docAgentNames() {
const docLabels = {
claude: "Claude",
codex: "Codex",
antigravity: "Antigravity",
copilot: "Copilot",
};
return Object.entries(loadManifest().agents).map(([id, agent]) => docLabels[id] ?? agent.name.replace(/\s+(Code|CLI)$/u, ""));
}
/** Drift: code-map.md claims classify-state values don't match source. */
function driftCodeMapClassifyState(codeMap, ctx) {
const states = readProjectStates(ctx);
const line = codeMap
.split(/\r?\n/)
.find((entry) => entry.includes("classify-state.ts"));
const docStates = line?.match(/\(([^)]+)\)/)?.[1]?.split("/") ?? [];
if (states.length === 0 || docStates.length === 0)
return [];
if (docStates.join("|") === states.join("|"))
return [];
return [
{
severity: "warning",
rule: "code-map-state-drift",
path: ".goat-flow/code-map.md",
message: `Code map lists classify-state values as ${docStates.join("/")} but source exports ${states.join("/")}.`,
suggestion: "Update the classify-state.ts summary in .goat-flow/code-map.md to match the live ProjectStateName union.",
},
];
}
/** Extract comma-separated dashboard view names from the code-map views line with deterministic sorting. */
function readCodeMapDashboardViews(codeMap) {
const line = codeMap
.split(/\r?\n/)
.find((entry) => entry.includes("views/") && entry.includes("HTML view"));
const raw = line?.match(/\(([^)]+)\)/)?.[1];
if (raw === undefined)
return null;
return raw
.split(",")
.map((name) => name.trim().replace(/\.html$/u, ""))
.filter(Boolean)
.sort();
}
/** Read live dashboard view files with a stable manifest fallback for filesystem stubs. */
function readDashboardViewFiles(ctx) {
const files = ctx.fs.glob("src/dashboard/views/*.html");
if (files.length === 0)
return [...loadManifest().facts.dashboard_views.names];
return files
.map((file) => file
.split("/")
.at(-1)
?.replace(/\.html$/u, "") ?? "")
.filter(Boolean)
.sort();
}
/** Drift: code-map.md dashboard view enumeration doesn't match live view files. */
function driftCodeMapDashboardViews(codeMap, ctx) {
const claimed = readCodeMapDashboardViews(codeMap);
const actual = readDashboardViewFiles(ctx);
if (claimed !== null && claimed.join("|") === actual.join("|"))
return [];
return [
{
severity: "warning",
rule: "code-map-dashboard-view-drift",
path: ".goat-flow/code-map.md",
message: `Code map lists dashboard views as ${claimed?.join(", ") ?? "none"}, but src/dashboard/views has ${actual.join(", ")}.`,
suggestion: "Update the src/dashboard/views/ summary in .goat-flow/code-map.md to match the live .html view files.",
},
];
}
/** Top-level committed playbooks, excluding README.md because it is the index; output is stable sorted. */
function readTopLevelSkillPlaybooks(ctx) {
return ctx.fs
.listDir(".goat-flow/skill-docs/playbooks")
.filter((entry) => entry.endsWith(".md") && entry !== "README.md")
.sort();
}
/** Drift: committed skill-playbook inventories omit live top-level playbooks. */
function driftSkillPlaybookInventory(path, text, ctx) {
const actual = readTopLevelSkillPlaybooks(ctx);
if (actual.length === 0)
return [];
const missing = actual.filter((name) => !text.includes(name));
if (missing.length === 0)
return [];
return [
{
severity: "warning",
rule: "skill-playbook-inventory-drift",
path,
message: `${path} omits top-level skill playbook(s): ${missing.join(", ")}. Live playbooks are ${actual.join(", ")}.`,
suggestion: "Update the committed skill-docs playbook inventory to include every top-level .goat-flow/skill-docs/playbooks/*.md playbook except README.md.",
},
];
}
/** Drift: docs/dashboard.md session-cap claims don't match MAX_SESSIONS.
* Matches both the rail phrasing (`up to N`) and the hard-cap phrasing
* (`Maximum N concurrent sessions`). Every claim that disagrees with the live
* constant is reported separately so same-doc contradictions surface too. */
function driftDashboardSessions(dashboard, ctx) {
const maxSessions = readMaxSessions(ctx);
if (maxSessions === null)
return [];
const patterns = [
{ regex: /up to (\d+)/g, label: "rail is up to" },
{ regex: /Maximum (\d+) concurrent sessions?/g, label: "Maximum" },
];
const findings = [];
const seen = new Set();
for (const { regex, label } of patterns) {
for (const match of dashboard.matchAll(regex)) {
const claimed = Number(match[1]);
if (claimed === maxSessions)
continue;
const key = `${label}:${claimed}`;
if (seen.has(key))
continue;
seen.add(key);
findings.push({
severity: "warning",
rule: "dashboard-sessions-drift",
path: "docs/dashboard.md",
message: `Dashboard docs say ${label} ${claimed}, but terminal.ts uses ${maxSessions}.`,
suggestion: `Update docs/dashboard.md to the live session cap (${maxSessions}).`,
});
}
}
return findings;
}
/** Drift contract: docs/dashboard.md view headings must match manifest dashboard views. */
function driftDashboardViewNames(dashboard) {
const lines = dashboard.split(/\r?\n/);
const start = lines.findIndex((line) => /^## Views\s*$/u.test(line));
if (start === -1)
return [];
const claimed = [];
for (const line of lines.slice(start + 1)) {
if (/^##\s+/u.test(line))
break;
const heading = line.match(/^###\s+(.+?)\s*$/u);
if (heading?.[1] === undefined)
continue;
claimed.push(heading[1].replace(/`/g, "").trim().toLowerCase().replace(/\s+/g, "-"));
}
const actual = loadManifest().facts.dashboard_views.names;
const claimedSorted = [...claimed].sort();
if (claimedSorted.join("|") === actual.join("|"))
return [];
return [
{
severity: "warning",
rule: "dashboard-view-name-drift",
path: "docs/dashboard.md",
message: `Dashboard docs list view headings as ${claimedSorted.join(", ")}, but manifest-backed views are ${actual.join(", ")}.`,
suggestion: "Update docs/dashboard.md view headings to match workflow/manifest.json dashboard_views.",
},
];
}
/** Drift: docs/dashboard.md idle-timeout claims don't match terminal defaults. */
function driftDashboardIdleTimeout(dashboard, ctx) {
const defaultTimeout = readDefaultIdleTimeout(ctx);
if (defaultTimeout === null)
return [];
const patterns = [
{ regex: /(\d+)[-\s]?minute idle timeout/gi, factor: 1 },
{ regex: /(\d+)[-\s]?hour idle timeout/gi, factor: 60 },
];
const findings = [];
const seen = new Set();
for (const { regex, factor } of patterns) {
for (const match of dashboard.matchAll(regex)) {
const claimedRaw = match[1];
if (claimedRaw === undefined)
continue;
const claimedMinutes = Number(claimedRaw) * factor;
if (claimedMinutes === defaultTimeout)
continue;
const phrase = match[0];
if (seen.has(phrase))
continue;
seen.add(phrase);
findings.push({
severity: "warning",
rule: "dashboard-idle-timeout-drift",
path: "docs/dashboard.md",
message: `Dashboard docs say "${phrase}" (${claimedMinutes} minutes), but terminal.ts defaults to ${defaultTimeout} minutes.`,
suggestion: `Update docs/dashboard.md to the live idle timeout (${defaultTimeout} minutes).`,
});
}
}
return findings;
}
/** Drift: docs/dashboard.md runner list doesn't match manifest. */
function driftDashboardRunners(dashboard) {
const runnerLine = dashboard.match(/- Supports (.+?) runners/);
if (runnerLine?.[1] === undefined)
return [];
const actual = docAgentNames();
const claimed = runnerLine[1]
.split(/,\s*|\s+and\s+/u)
.map((name) => name.trim().replace(/^and\s+/u, ""))
.filter(Boolean);
if (claimed.join("|") === actual.join("|"))
return [];
return [
{
severity: "warning",
rule: "dashboard-runner-drift",
path: "docs/dashboard.md",
message: `Dashboard docs list runners as ${claimed.join(", ")}, but manifest-backed runners are ${actual.join(", ")}.`,
suggestion: "Update docs/dashboard.md to match the current manifest-backed runner list.",
},
];
}
/** Drift: docs/dashboard.md carries a stale release tag in current reference prose. */
function driftDashboardVersionReference(dashboard) {
const runnerLine = dashboard.match(/- Supports .+? runners[^\n]*/u)?.[0];
const version = runnerLine?.match(/\bin v(\d+\.\d+\.\d+)\b/u)?.[1];
if (version === undefined || version === AUDIT_VERSION)
return [];
return [
{
severity: "warning",
rule: "dashboard-version-reference-drift",
path: "docs/dashboard.md",
message: `Dashboard docs reference v${version}, but the current package version is v${AUDIT_VERSION}.`,
suggestion: "Remove version-specific wording from docs/dashboard.md or update it during the release bump.",
},
];
}
/** Stale phrases to flag in docs/skills.md. */
const SKILLS_DOC_STALE_PHRASES = [
{
needle: "MUST read all files before commenting",
rule: "skills-review-contract-drift",
message: "docs/skills.md still claims goat-review must read all files before commenting; the live skill uses diff-first review with explicit files-not-opened reporting.",
},
{
needle: "10-category checklist",
rule: "skills-security-contract-drift",
message: "docs/skills.md still sells goat-security as a fixed 10-category checklist; the live skill uses repo-appropriate threat categories instead.",
},
{
needle: "MUST rank findings by exploitability",
rule: "skills-security-gate-drift",
message: "docs/skills.md still claims exploitability ranking is a universal hard gate; the live skill only requires it in deeper threat-model work.",
},
];
/** Drift: docs/skills.md contains stale contract phrases. */
function driftSkillsDoc(skillsDoc) {
return SKILLS_DOC_STALE_PHRASES.filter((p) => skillsDoc.includes(p.needle)).map((phrase) => ({
severity: "warning",
rule: phrase.rule,
path: "docs/skills.md",
message: phrase.message,
}));
}
/**
* Drift: glossary.md contains agent-specific or stale canonical pointers.
*
* Returns an empty finding list when no stale phrase is present; stale prose
* reports as content findings rather than treated as a parser error.
* The caller supplies already-read text, so this helper performs no IO and has
* no recover path beyond returning every matched stale phrase as a finding.
*/
function driftGlossary(glossary) {
const findings = [];
if (glossary.includes("Claude Search Optimization")) {
findings.push({
severity: "warning",
rule: "glossary-cso-drift",
path: ".goat-flow/glossary.md",
message: "Glossary still expands CSO as Claude Search Optimization instead of using agent-neutral wording.",
});
}
if (/\|\s*Ceremony\s*\|.*CLAUDE\.md/u.test(glossary) ||
/\|\s*Router Table\s*\|.*CLAUDE\.md/u.test(glossary)) {
findings.push({
severity: "warning",
rule: "glossary-canonical-file-drift",
path: ".goat-flow/glossary.md",
message: "Glossary still points core concepts through CLAUDE.md instead of an agent-neutral canon.",
});
}
return findings;
}
/**
* Drift: setup/01-system-overview.md oversells session logs as durable memory.
*
* Returns an empty finding list when neither retired phrase is present; matches
* report warnings because setup prose is the source of future install behavior.
* The caller supplies already-read text, so this helper performs no IO and has
* no recover path beyond returning every matched stale phrase as a finding.
*/
function driftSetupOverview(setupOverview) {
const findings = [];
if (setupOverview.includes("persistent memory across sessions")) {
findings.push({
severity: "warning",
rule: "setup-memory-tier-drift",
path: "workflow/setup/01-system-overview.md",
message: "Setup overview still sells goat-flow as persistent memory across sessions even though session logs/tasks are local gitignored continuity only.",
});
}
if (setupOverview.includes("preserve any useful content in `.goat-flow/logs/sessions/`")) {
findings.push({
severity: "warning",
rule: "setup-session-log-tier-drift",
path: "workflow/setup/01-system-overview.md",
message: "Setup overview still routes durable legacy content into session logs instead of lessons / footguns / decisions.",
});
}
return findings;
}
/** Drift: ADR-020 still says Copilot accepted while manifest excludes it. */
function driftCopilotDecision(decisionText) {
const hasCopilot = Object.prototype.hasOwnProperty.call(loadManifest().agents, "copilot");
const isAccepted = /\*\*Status:\*\*\s*Accepted/u.test(decisionText);
if (isAccepted && !hasCopilot) {
return [
{
severity: "warning",
rule: "adr020-copilot-drift",
path: ".goat-flow/learning-loop/decisions/ADR-020-add-copilot-cli.md",
message: "ADR-020 still says Copilot support is accepted while the manifest-backed runtime supports only claude/codex/antigravity.",
suggestion: "Either defer/revert ADR-020 or implement manifest/type/runtime Copilot parity in the same change.",
},
];
}
if (!isAccepted && hasCopilot) {
return [
{
severity: "warning",
rule: "adr020-copilot-drift",
path: ".goat-flow/learning-loop/decisions/ADR-020-add-copilot-cli.md",
message: "ADR-020 no longer reflects the live manifest-backed runtime: Copilot is shipped in code but the ADR is not accepted.",
suggestion: "Update ADR-020 to Accepted and align its decision text with the manifest-backed Copilot support.",
},
];
}
return [];
}
/** Drift: ADR-013 still carries pre-simplification implementation detail. */
function driftScannerRemovalDecision(decisionText) {
if (!/v0\.9\/v1\.0/u.test(decisionText) &&
!/agent-setup-checks\.ts/u.test(decisionText) &&
!/17 build checks \(7 project setup \+ 10 per-agent/u.test(decisionText)) {
return [];
}
return [
{
severity: "warning",
rule: "adr013-stale-implementation-detail",
path: ".goat-flow/learning-loop/decisions/ADR-013-remove-scanner-system.md",
message: "ADR-013 still contains stale classifier states, file paths, or audit-count details from the pre-simplification implementation.",
suggestion: "Refresh ADR-013 to describe the scanner removal decision without stale implementation-era counts and file names.",
},
];
}
/**
* Targeted semantic drift checks for high-trust cold-path docs.
*
* Missing optional docs recover by being skipped, while readable docs are added
* to the scanned count so audit output reflects the actual coverage.
*
* @param ctx - audit context; its readonly FS reads both the curated docs and the live source files
* (classify-state, terminal server, manifest) the docs are checked against
* @returns the accumulated drift findings and the count of docs actually read, so callers can report
* coverage; an empty findings list means no drift was detected among the docs present on disk
*/
export function scanSemanticDrift(ctx) {
const findings = [];
const scanned = new Set();
/** Read one doc and track that it was scanned. */
const readAndTrack = (path) => {
const text = ctx.fs.readFile(path);
if (text !== null)
scanned.add(path);
return text;
};
const codeMap = readAndTrack(".goat-flow/code-map.md");
if (codeMap !== null) {
findings.push(...driftCodeMapClassifyState(codeMap, ctx));
findings.push(...driftCodeMapDashboardViews(codeMap, ctx));
findings.push(...driftSkillPlaybookInventory(".goat-flow/code-map.md", codeMap, ctx));
}
const architecture = readAndTrack(".goat-flow/architecture.md");
if (architecture !== null) {
findings.push(...driftSkillPlaybookInventory(".goat-flow/architecture.md", architecture, ctx));
}
const dashboard = readAndTrack("docs/dashboard.md");
if (dashboard !== null) {
findings.push(...driftDashboardSessions(dashboard, ctx));
findings.push(...driftDashboardViewNames(dashboard));
findings.push(...driftDashboardIdleTimeout(dashboard, ctx));
findings.push(...driftDashboardRunners(dashboard));
findings.push(...driftDashboardVersionReference(dashboard));
}
const skillsDoc = readAndTrack("docs/skills.md");
if (skillsDoc !== null)
findings.push(...driftSkillsDoc(skillsDoc));
const glossary = readAndTrack(".goat-flow/glossary.md");
if (glossary !== null) {
findings.push(...driftGlossary(glossary));
}
const setupOverview = readAndTrack("workflow/setup/01-system-overview.md");
if (setupOverview !== null)
findings.push(...driftSetupOverview(setupOverview));
const copilotDecision = readAndTrack(".goat-flow/learning-loop/decisions/ADR-020-add-copilot-cli.md");
if (copilotDecision !== null)
findings.push(...driftCopilotDecision(copilotDecision));
const scannerRemovalDecision = readAndTrack(".goat-flow/learning-loop/decisions/ADR-013-remove-scanner-system.md");
if (scannerRemovalDecision !== null)
findings.push(...driftScannerRemovalDecision(scannerRemovalDecision));
return { findings, filesScanned: scanned.size };
}
//# sourceMappingURL=check-factual-semantic-drift.js.map