@blundergoat/goat-flow
Version:
AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.
405 lines • 18.4 kB
JavaScript
/** Strict YYYY-MM-DD format - rejects full ISO 8601 timestamps in `last_reviewed`. */
export const ISO_DATE_REGEX = /^\d{4}-\d{2}-\d{2}$/;
/**
* Matches file path evidence in multiple formats:
* - `src/auth.ts` (backtick-wrapped file path)
* - `src/auth.ts:42` (backtick-wrapped with line number)
* - `src/auth.ts:42-50` (backtick-wrapped with line range)
* - (lines 866-880) or (line 52) (prose-style)
* Line numbers are discouraged per ADR-024; flagged for cleanup when found alongside a semantic anchor.
* File paths alone remain valid evidence.
*/
export const EVIDENCE_PATTERN = /`[^`]+\.[a-zA-Z]{1,10}(?::[0-9]+(?:[-,][0-9]+)*)?`|\(lines?\s+[0-9]+/;
/** Regex to extract file paths from backtick-wrapped references (with optional line numbers). */
export const FILE_REF_REGEX = /`([^`]+\.[a-zA-Z]{1,10})(?::[0-9]+(?:[-,][0-9]+)*)?`/g;
/** Matches backtick and double-quoted `(search: ...)` evidence anchors. */
const SEARCH_ANCHOR_REGEX = /`([^`]+\.[a-zA-Z0-9]{1,10})`\s*\(search:\s*(?:`([^`]+)`|"((?:\\.|[^"\\])*)")\)/g;
const BARE_EVIDENCE_ANCHOR_LINE_REGEX = /(?:^|\s)(?:\*\*)?Evidence anchors?(?:\*\*)?:/i;
/**
* Decide whether a backtick-wrapped reference names a real file path rather than a
* URL or hostname (which share the `host:port` shape). Used to gate staleness
* checks so a `localhost:3000`-style token is never treated as a missing file.
*
* @param filePath - candidate reference text with any trailing `:line` already split off
* @returns true for paths with a slash or a root-level filename extension; false for URLs, hostnames, and bare extensionless names
*/
export function isFileRef(filePath) {
// Skip hostname/URL patterns (not file references)
if (/^https?:|:\/\//.test(filePath) ||
/^(localhost|127\.0\.0\.1|0\.0\.0\.0)$/i.test(filePath))
return false;
// Paths with '/' are clearly file paths
if (filePath.includes("/"))
return true;
// Root-level files with extensions (e.g., AGENTS.md:42) are valid refs
// Bare names without extensions (e.g., webpack:123) are ambiguous - skip
return /\.[a-zA-Z0-9]+$/.test(filePath);
}
/** Check whether a file reference can be validated for staleness without guessing. */
/** Paths under these dirs are intentionally gitignored per `.goat-flow/plans/.gitignore`
* (milestone files + plan subdirs + `.active` marker are local-session state by
* design). References to them in lessons/footguns are navigation pointers,
* not resolvable artifacts - treating absence as "stale" false-positives on
* any clean checkout or CI run. Keep this list short and specific. */
function isIntentionallyGitignored(filePath) {
return (filePath.startsWith(".goat-flow/plans/") ||
filePath.startsWith(".goat-flow/scratchpad/") ||
filePath.startsWith(".goat-flow/logs/"));
}
/** Check whether the file path is checkable for staleness. */
function isCheckableForStaleness(filePath, fs) {
if (isIntentionallyGitignored(filePath))
return false;
if (filePath.includes("/"))
return true;
// If it exists at root, it's checkable regardless of extension
if (fs.exists(filePath))
return true;
// A bare source filename that doesn't exist at the repo root is probably
// shorthand for a deeply nested file; skip it to avoid false positives.
if (/\.(go|ts|tsx|js|jsx|py|php|rs|java|kt|rb|cs|c|cpp|h|hpp|swift|scala)$/i.test(filePath))
return false;
// Non-source files (AGENTS.md, package.json, etc.) should be at root
return true;
}
/** Normalize a surface path so trailing slashes do not affect comparisons. */
function normalizeSurfacePath(path) {
return path.replace(/\/$/, "");
}
/**
* Find learning-loop artifact surfaces that exist on disk but sit outside the
* configured canonical location - the signal that a project is splitting one
* concern across two directories. Returns nothing unless a canonical path is
* actually present, so a project that simply hasn't adopted the surface yet is
* not flagged. Trailing slashes are normalized before comparison.
*
* @param fs - read-only filesystem adapter for the target project
* @param canonicalPaths - the configured/blessed locations; at least one must exist or the result is empty
* @param knownPaths - candidate surfaces to test against the canonical set
* @returns existing non-canonical paths, sorted lexicographically for deterministic output; empty when none compete
*/
export function findCompetingArtifactSurfaces(fs, canonicalPaths, knownPaths) {
if (!canonicalPaths.some((path) => fs.exists(path)))
return [];
const canonicalSet = new Set(canonicalPaths.map(normalizeSurfacePath));
return knownPaths
.filter((path) => !canonicalSet.has(normalizeSurfacePath(path)))
.filter((path) => fs.exists(path))
.sort((a, b) => a.localeCompare(b));
}
/**
* Read a learning-loop location into a stable, sorted set of markdown entries.
* Handles both config shapes uniformly: a directory (every `.md` except the
* README.md/INDEX.md metadata files, sorted lexicographically) and a single flat
* `.md` file (one entry). INDEX.md is generated bucket metadata (`goat-flow index`),
* not entry content - including it would count phantom legacy entries and force
* entry frontmatter onto a generated file. The sort is load-bearing - downstream
* entry ordering and report output must be deterministic across machines, so
* directory listing order is never trusted.
*
* @param fs - read-only filesystem adapter for the target project
* @param dir - directory path, or a single `.md` file path for flat-file config mode
* @returns the location with its existence flag and entries; files is empty when the location is absent or unreadable
*/
export function listMarkdownEntries(fs, dir) {
// Flat-file mode: config points at a single .md file instead of a directory
if (dir.endsWith(".md")) {
const exists = fs.exists(dir);
const content = exists ? fs.readFile(dir) : null;
const files = content !== null ? [{ path: dir, content }] : [];
return { path: dir, exists, files };
}
const exists = fs.exists(dir);
const files = exists
? fs
.listDir(dir)
.filter((file) => file.endsWith(".md") && file !== "README.md" && file !== "INDEX.md")
.sort((a, b) => a.localeCompare(b))
.flatMap((file) => {
const path = dir.endsWith("/") ? `${dir}${file}` : `${dir}/${file}`;
const content = fs.readFile(path);
if (content === null)
return [];
return [{ path, content }];
})
: [];
return { path: dir, exists, files };
}
/**
* Separate a leading `---`-delimited YAML frontmatter block from the markdown body.
* Recognizes frontmatter only at the very start of the content; a `---` later in
* the document is left in the body untouched.
*
* @param content - raw markdown file content
* @returns the frontmatter text without its `---` fences (null when there is none) and the remaining body
*/
export function parseMarkdownFrontmatter(content) {
const match = content.match(/^---\n([\s\S]*?)\n---\n?([\s\S]*)$/);
if (!match)
return { frontmatter: null, body: content };
return { frontmatter: match[1] ?? "", body: match[2] ?? "" };
}
/**
* Parse simple `key: value` pairs from a YAML frontmatter block.
* Only handles flat scalar fields (sufficient for goat-flow's single-level frontmatter);
* nested structures, arrays, and multi-line scalars are intentionally unsupported.
*
* @param frontmatter - YAML frontmatter body without the surrounding `---` markers
* @returns flat key/value fields parsed from the frontmatter block
*/
export function parseFrontmatterFields(frontmatter) {
const fields = {};
for (const line of frontmatter.split("\n")) {
const match = line.match(/^([a-zA-Z_][a-zA-Z0-9_]*):\s*(.*?)\s*$/);
if (!match || match[1] === undefined)
continue;
fields[match[1]] = match[2] ?? "";
}
return fields;
}
/**
* Compute days-since-review and a coarse freshness band for a bucket file.
* Returns `unknown` for missing or non-YYYY-MM-DD values so callers can flag them.
*
* @param lastReviewed - ISO date from bucket frontmatter, or null when absent
* @param now - comparison clock for deterministic tests and reports
*/
export function computeFreshness(lastReviewed, now = new Date()) {
if (lastReviewed === null || !ISO_DATE_REGEX.test(lastReviewed)) {
return { days: null, band: "unknown" };
}
const reviewedMs = Date.parse(`${lastReviewed}T00:00:00Z`);
if (Number.isNaN(reviewedMs))
return { days: null, band: "unknown" };
const todayMs = Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate());
const days = Math.max(0, Math.floor((todayMs - reviewedMs) / 86400000));
if (days <= 30)
return { days, band: "fresh" };
if (days <= 90)
return { days, band: "aging" };
return { days, band: "stale" };
}
/**
* Count how many times a pattern matches across a string. Pass a global (`/g`)
* regex - `matchAll` requires it, and without the flag the match count is not what
* a caller expects.
*
* @param content - text to scan
* @param pattern - global regular expression; non-global patterns will throw under matchAll
* @returns the total number of non-overlapping matches; 0 when none match
*/
export function countMatches(content, pattern) {
return Array.from(content.matchAll(pattern)).length;
}
/**
* Remove `~~...~~` strikethrough spans before evidence is scanned, so a reference
* an author has struck through (marked as historical) is not counted as live
* evidence. Run this first in every reference check; otherwise retired anchors
* resurface as findings.
*
* @param content - markdown that may contain strikethrough spans, including multi-line ones
* @returns the content with all strikethrough spans removed
*/
export function stripStrikethrough(content) {
return content.replace(/~~[\s\S]*?~~/g, "");
}
/** Check a concrete file:line reference for out-of-bounds lines or missing anchors. */
function getLineRefDiagnostic(fs, filePath, rawLines, hasSemanticAnchor) {
const lineCount = fs.lineCount(filePath);
const lineNumbers = Array.from(rawLines.matchAll(/[0-9]+/g)).flatMap((lineMatch) => {
const value = Number.parseInt(lineMatch[0], 10);
return Number.isNaN(value) ? [] : [value];
});
const ref = `${filePath}:${rawLines}`;
if (lineNumbers.some((lineNumber) => lineNumber < 1 || lineNumber > lineCount)) {
return ref;
}
if (!hasSemanticAnchor)
return `${ref} (missing semantic anchor)`;
return lineNumbers.length > 0
? `${ref} (line ref redundant, semantic anchor exists)`
: null;
}
/**
* Validate every file reference in one footgun section and tally the result.
* Reports a path as stale when the file no longer exists, and flags a `file:line`
* reference when the line is out of bounds, lacks a semantic anchor, or carries a
* line number made redundant by an anchor (the ADR-024 anchor-over-line-number
* contract). Strikethrough is stripped first so struck evidence is ignored.
*
* @param fs - read-only filesystem adapter used to resolve and line-count referenced files
* @param content - the footgun section's markdown
* @returns counts plus the stale-path and invalid-line-reference lists; all empty when every reference is valid
*/
export function summarizeFootgunRefs(fs, content) {
const summary = {
staleRefs: [],
invalidLineRefs: [],
totalRefs: 0,
validRefs: 0,
};
const cleanedContent = stripStrikethrough(content);
for (const line of cleanedContent.split("\n")) {
const hasSemanticAnchor = new RegExp(SEARCH_ANCHOR_REGEX.source).test(line);
const fileRefs = line.matchAll(/`([^`]+):([0-9]+(?:[-,][0-9]+)*)`/g);
for (const match of fileRefs) {
const filePath = match[1];
const rawLines = match[2];
if (filePath === undefined ||
rawLines === undefined ||
!isFileRef(filePath) ||
!isCheckableForStaleness(filePath, fs))
continue;
summary.totalRefs++;
if (!fs.exists(filePath)) {
summary.staleRefs.push(`${filePath}:${rawLines}`);
continue;
}
const diagnostic = getLineRefDiagnostic(fs, filePath, rawLines, hasSemanticAnchor);
if (diagnostic !== null) {
summary.invalidLineRefs.push(diagnostic);
continue;
}
summary.validRefs++;
}
}
scanBareEvidenceAnchors(fs, cleanedContent, summary);
scanSearchAnchors(fs, cleanedContent, summary);
return summary;
}
/** Bare `Evidence anchors:` path references are durable evidence and must not go stale silently. */
function scanBareEvidenceAnchors(fs, cleanedContent, summary) {
for (const line of cleanedContent.split("\n")) {
if (!BARE_EVIDENCE_ANCHOR_LINE_REGEX.test(line))
continue;
for (const match of line.matchAll(new RegExp(FILE_REF_REGEX.source, "g"))) {
const filePath = checkableBareEvidenceAnchorPath(fs, line, match);
if (filePath === null)
continue;
summary.totalRefs++;
if (fs.exists(filePath)) {
summary.validRefs++;
}
else {
summary.staleRefs.push(filePath);
}
}
}
}
function checkableBareEvidenceAnchorPath(fs, line, match) {
const filePath = match[1];
if (filePath === undefined)
return null;
if (/[*?{}<>]|\.\.\./.test(filePath))
return null;
if (/:[0-9]+/.test(match[0]))
return null;
if (isFollowedBySearchAnchor(line, match))
return null;
if (!isFileRef(filePath))
return null;
if (!isCheckableForStaleness(filePath, fs))
return null;
return filePath;
}
function isFollowedBySearchAnchor(line, match) {
const matchIndex = match.index;
if (matchIndex === undefined)
return false;
return line
.slice(matchIndex + match[0].length)
.trimStart()
.startsWith("(search:");
}
/** `(search: "<needle>")` anchors: confirm the literal string still appears in
* the referenced file. A stale anchor is the mechanism that lets retired-code
* footguns pass validation while pointing at code that no longer exists. */
function scanSearchAnchors(fs, cleanedContent, summary) {
const searchAnchors = cleanedContent.matchAll(new RegExp(SEARCH_ANCHOR_REGEX.source, "g"));
for (const match of searchAnchors) {
const anchor = searchAnchorFromMatch(match);
if (anchor === null ||
!isFileRef(anchor.filePath) ||
!isCheckableForStaleness(anchor.filePath, fs))
continue;
summary.totalRefs++;
if (!fs.exists(anchor.filePath)) {
summary.staleRefs.push(`${anchor.filePath} (search: \`${anchor.needle}\`)`);
continue;
}
const fileContent = fs.readFile(anchor.filePath);
if (fileContent === null || !fileContent.includes(anchor.needle)) {
summary.staleRefs.push(`${anchor.filePath} (search: \`${anchor.needle}\`)`);
continue;
}
summary.validRefs++;
}
}
function searchAnchorFromMatch(match) {
const filePath = match[1];
const rawNeedle = match[2] ?? match[3];
if (filePath === undefined || rawNeedle === undefined)
return null;
return { filePath, needle: rawNeedle.replace(/\\(["\\])/g, "$1") };
}
/**
* Validate the file references in one lesson or pattern section, sharing the same
* staleness and ADR-024 line-reference rules as footguns. Lessons cite full
* project-rooted paths (src/, lib/, docs/, .goat-flow/, ...), so this matches that
* prefix grammar and skips glob-like or `...`-elided tokens that cannot be resolved
* to a single file.
*
* @param fs - read-only filesystem adapter used to resolve and line-count referenced files
* @param content - the lesson or pattern section's markdown
* @returns counts plus the stale-path and invalid-line-reference lists; all empty when every reference is valid
*/
export function summarizeLessonRefs(fs, content) {
const summary = {
staleRefs: [],
invalidLineRefs: [],
totalRefs: 0,
validRefs: 0,
};
const pathPattern = /`((?:src|config|app|apps|lib|docs|scripts|setup|workflow|agents|\.goat-flow)\/[^`]+)`/g;
const cleanedContent = stripStrikethrough(content);
for (const match of cleanedContent.matchAll(pathPattern)) {
const ref = match[1];
if (ref === undefined || /[*?{}<>]|\.\.\./.test(ref))
continue;
const filePath = ref.replace(/:[0-9]+(?:[-,][0-9]+)*$/, "");
if (isIntentionallyGitignored(filePath))
continue;
summary.totalRefs++;
if (fs.exists(filePath)) {
summary.validRefs++;
}
else {
summary.staleRefs.push(filePath);
}
}
summary.invalidLineRefs.push(...collectInvalidLessonLineRefs(fs, cleanedContent));
scanSearchAnchors(fs, cleanedContent, summary);
return summary;
}
/** Validate `file:line` refs in lesson content, returning out-of-bounds or anchorless refs. */
function collectInvalidLessonLineRefs(fs, cleanedContent) {
const invalid = [];
for (const line of cleanedContent.split("\n")) {
const hasSemanticAnchor = new RegExp(SEARCH_ANCHOR_REGEX.source).test(line);
for (const match of line.matchAll(/`([^`]+):([0-9]+(?:[-,][0-9]+)*)`/g)) {
const filePath = match[1];
const rawLines = match[2];
if (filePath === undefined ||
rawLines === undefined ||
!isFileRef(filePath) ||
!isCheckableForStaleness(filePath, fs) ||
!fs.exists(filePath))
continue;
const diagnostic = getLineRefDiagnostic(fs, filePath, rawLines, hasSemanticAnchor);
if (diagnostic !== null)
invalid.push(diagnostic);
}
}
return invalid;
}
//# sourceMappingURL=learning-loop-common.js.map