@blundergoat/goat-flow
Version:
AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.
401 lines • 16.4 kB
JavaScript
/**
* Filesystem-facing layer of skill-quality scoring: discovers artifacts on disk, safely reads their
* content under byte caps, composes the scoring surface (preamble, conventions, and skill-local
* references), and provides the small text utilities (heading counts, frontmatter stripping, token
* estimate) the metric scorers call.
*
* This is the only module here that touches the filesystem, so the safety rules live here: symlinks
* are refused, every reference include is confined to its allowed root (no `..` escape), and uploads
* can disable disk scanning so a user-supplied name cannot leak on-disk content into the score.
* Reads are capped and truncate on UTF-8 character boundaries to keep composed sizes deterministic.
*/
import { closeSync, existsSync, lstatSync, openSync, readSync, readdirSync, realpathSync, statSync, } from "node:fs";
import { basename, dirname, isAbsolute, join, relative, resolve, } from "node:path";
import { loadQualityConfig, } from "./quality-config.js";
/** Return true for normal entries; swallows symlink and disappearing-path errors as unsafe. */
function isSafeEntry(path) {
try {
return !lstatSync(path).isSymbolicLink();
}
catch {
return false;
}
}
/**
* Sanitize a path segment for reference ids without leaking separators into artifact ids.
*/
function referenceIdSegment(value) {
return (value
.replace(/^\.+\/?/u, "")
.replace(/[^a-z0-9_-]+/giu, "-")
.replace(/^-+|-+$/gu, "")
.toLowerCase() || "reference-root");
}
function referenceArtifactId(candidate, nameCounts, usedIds) {
const duplicateName = (nameCounts.get(candidate.name) ?? 0) > 1;
const baseId = duplicateName
? `reference:${referenceIdSegment(dirname(candidate.path))}:${candidate.name}`
: `reference:${candidate.name}`;
let id = baseId;
let suffix = 2;
while (usedIds.has(id)) {
id = `${baseId}:${suffix}`;
suffix += 1;
}
usedIds.add(id);
return id;
}
/**
* Derive the canonical name for a shared reference doc. Plain `*.md` files use
* their basename; a `README.md` only counts as a shared reference inside the
* `skill-quality-testing` directory (named after the directory), and is ignored
* elsewhere so generic READMEs are not treated as references.
*
* @param refDir - Directory holding the reference file.
* @param filename - The reference file's basename.
* @returns The reference name, or `null` when the file is not a shared reference.
*/
function sharedReferenceName(refDir, filename) {
if (filename !== "README.md")
return filename.replace(/\.md$/, "");
const directoryName = basename(refDir);
return directoryName === "skill-quality-testing" ? directoryName : null;
}
/**
* Build the synthetic path used when uploaded markdown is evaluated as a reference.
*
* @param name - user-supplied artifact name; used only to label the synthetic record, never to read disk.
* @returns a project-relative playbook path under `.goat-flow/skill-docs/playbooks/`; no file is created.
*/
export function uploadedSharedReferencePath(name) {
return `.goat-flow/skill-docs/playbooks/${name}.md`;
}
/** Forward-slash a relative project path so artifact records render the same
* on Windows and POSIX. fs operations accept either separator; user-visible
* paths (dashboard, JSON output, log entries) must not. */
function relPosix(projectRoot, target) {
return relative(projectRoot, target).replace(/\\/g, "/");
}
function registerSkillArtifact(projectRoot, artifactsById, name, skillFile, source) {
const id = `skill:${name}`;
const path = relPosix(projectRoot, skillFile);
const existing = artifactsById.get(id);
if (existing) {
existing.mirrorPaths = [...(existing.mirrorPaths ?? []), path];
return;
}
artifactsById.set(id, {
id,
name,
path,
kind: "skill",
source,
mirrorPaths: [],
missingMirrors: [],
});
}
function addMissingMirrorMetadata(projectRoot, artifact, config) {
if (artifact.kind !== "skill")
return artifact;
const expected = config.walkRoots.skills.map(({ dir }) => relPosix(projectRoot, join(projectRoot, dir, artifact.name, "SKILL.md")));
const present = new Set([artifact.path, ...(artifact.mirrorPaths ?? [])]);
return {
...artifact,
mirrorPaths: artifact.mirrorPaths ?? [],
missingMirrors: expected.filter((path) => !present.has(path)),
};
}
// eslint-disable-next-line complexity -- intentional because inventory walks multiple artifact roots and dedupes mirrored skills into one canonical artifact
export function discoverArtifacts(projectRoot, config = loadQualityConfig(projectRoot)) {
const artifactsById = new Map();
for (const { dir, source } of config.walkRoots.skills) {
const skillsDir = join(projectRoot, dir);
if (!existsSync(skillsDir))
continue;
for (const entry of readdirSync(skillsDir, { withFileTypes: true })) {
const entryPath = join(skillsDir, entry.name);
if (!entry.isDirectory() || !isSafeEntry(entryPath))
continue;
const skillFile = join(entryPath, "SKILL.md");
if (!existsSync(skillFile) || !isSafeEntry(skillFile))
continue;
registerSkillArtifact(projectRoot, artifactsById, entry.name, skillFile, source);
}
}
const artifacts = Array.from(artifactsById.values()).map((artifact) => addMissingMirrorMetadata(projectRoot, artifact, config));
const referenceCandidates = [];
for (const { dir } of config.walkRoots.references) {
const refDir = join(projectRoot, dir);
if (!existsSync(refDir))
continue;
for (const entry of readdirSync(refDir, { withFileTypes: true })) {
const filePath = join(refDir, entry.name);
if (!entry.isFile() || !entry.name.endsWith(".md"))
continue;
const name = sharedReferenceName(refDir, entry.name);
if (name === null || !isSafeEntry(filePath))
continue;
referenceCandidates.push({
name,
path: relPosix(projectRoot, filePath),
});
}
}
const referenceNameCounts = new Map();
for (const candidate of referenceCandidates) {
referenceNameCounts.set(candidate.name, (referenceNameCounts.get(candidate.name) ?? 0) + 1);
}
const usedReferenceIds = new Set(artifacts.map((artifact) => artifact.id));
for (const candidate of referenceCandidates) {
artifacts.push({
id: referenceArtifactId(candidate, referenceNameCounts, usedReferenceIds),
name: candidate.name,
path: candidate.path,
kind: "shared-reference",
source: "shared-reference",
});
}
return artifacts;
}
export function findArtifact(projectRoot, artifactId, config = loadQualityConfig(projectRoot)) {
return (discoverArtifacts(projectRoot, config).find((a) => a.id === artifactId) ??
null);
}
/**
* Guard resolved paths before any reference include can escape its allowed root.
*/
function isPathWithin(parent, child) {
const rel = relative(parent, child);
if (rel === "")
return true;
if (isAbsolute(rel))
return false;
const [firstSegment] = rel.split(/[\\/]/);
return firstSegment !== "..";
}
function readTextCapped(path, config) {
if (!existsSync(path) || !isSafeEntry(path))
return null;
const stats = statSync(path);
if (!stats.isFile())
return null;
const maxBytes = Math.max(0, Math.floor(config.maxArtifactBytes));
const bytesToRead = Math.min(stats.size, maxBytes);
const buffer = Buffer.alloc(bytesToRead);
const fd = openSync(path, "r");
try {
const bytesRead = readSync(fd, buffer, 0, bytesToRead, 0);
return {
content: buffer.subarray(0, bytesRead).toString("utf-8"),
truncated: stats.size > config.maxArtifactBytes,
};
}
finally {
closeSync(fd);
}
}
function resolveSkillReferencePath(skillDir, relativeRef) {
if (relativeRef.includes("\0"))
return null;
const referenceRoot = resolve(skillDir, "references");
const refPath = resolve(referenceRoot, relativeRef);
if (!isPathWithin(referenceRoot, refPath))
return null;
if (existsSync(referenceRoot) && !isSafeEntry(referenceRoot))
return null;
if (!existsSync(refPath))
return refPath;
try {
const realReferenceRoot = realpathSync(referenceRoot);
const realRefPath = realpathSync(refPath);
if (!isPathWithin(realReferenceRoot, realRefPath))
return null;
}
catch {
return null;
}
return refPath;
}
export function readArtifactContent(projectRoot, artifact, config) {
const fullPath = join(projectRoot, artifact.path);
const text = readTextCapped(fullPath, config);
if (text === null)
return { content: "", notes: [] };
return {
content: text.content,
notes: text.truncated
? [`artifact truncated at ${config.maxArtifactBytes} bytes`]
: [],
};
}
/**
* Read an optional composed-context file, returning `null` when caps or safety checks reject it.
*/
function readOptionalText(path, config) {
return readTextCapped(path, config)?.content ?? null;
}
/**
* Measure byte caps in UTF-8 so dashboard upload limits match HTTP body limits.
*
* @param content - text to measure; counted as encoded UTF-8 bytes, not JS string length (UTF-16 units).
* @returns the UTF-8 byte count - the unit every cap in this module is expressed in.
*/
export function utf8ByteLength(content) {
return Buffer.byteLength(content, "utf-8");
}
/**
* Truncate without splitting multibyte characters in composed scoring surfaces.
*
* @param content - text to truncate, iterated by Unicode code point so multibyte chars stay intact.
* @param maxBytes - UTF-8 byte budget; negative or fractional values are floored to a non-negative cap.
* @returns the longest whole-character prefix that fits within `maxBytes`; "" when the budget is 0.
*/
export function truncateUtf8Bytes(content, maxBytes) {
const cap = Math.max(0, Math.floor(maxBytes));
let used = 0;
let output = "";
for (const char of content) {
const next = utf8ByteLength(char);
if (used + next > cap)
break;
output += char;
used += next;
}
return output;
}
// eslint-disable-next-line complexity -- intentional because composition assembles preamble, conventions, and skill-local references in a fixed pipeline; each branch is a distinct artifact-class case
export function composeArtifactContent(projectRoot, artifact, rawContent, config, options = {}) {
if (artifact.kind === "shared-reference") {
return {
raw: rawContent,
composed: rawContent,
sources: [basename(artifact.path)],
notes: [],
};
}
const scanDisk = options.scanDisk !== false;
const chunks = [];
const sources = [];
const notes = [];
if (config.composition.skillPreamblePath) {
const preamble = readOptionalText(join(projectRoot, config.composition.skillPreamblePath), config);
if (preamble !== null) {
chunks.push(preamble);
sources.push(basename(config.composition.skillPreamblePath));
}
}
if (config.composition.skillConventionsPath &&
/skill-conventions/i.test(rawContent)) {
const conventions = readOptionalText(join(projectRoot, config.composition.skillConventionsPath), config);
if (conventions !== null) {
chunks.push(conventions);
sources.push(basename(config.composition.skillConventionsPath));
}
}
chunks.push(rawContent);
sources.push("SKILL.md");
if (scanDisk) {
const skillDir = dirname(join(projectRoot, artifact.path));
const seenReferences = new Set();
const refRegex = new RegExp(config.composition.skillReferencePattern, "g");
for (const match of rawContent.matchAll(refRegex)) {
const relativeRef = match[1];
if (!relativeRef)
continue;
if (seenReferences.has(relativeRef))
continue;
seenReferences.add(relativeRef);
const refPath = resolveSkillReferencePath(skillDir, relativeRef);
if (refPath === null)
continue;
const refContent = readOptionalText(refPath, config);
if (refContent === null)
continue;
chunks.push(refContent);
sources.push(`references/${relativeRef}`);
}
try {
for (const entry of readdirSync(skillDir, { withFileTypes: true })) {
if (!entry.isFile())
continue;
if (!entry.name.endsWith(".md"))
continue;
if (entry.name === "SKILL.md" || entry.name === "README.md")
continue;
const filePath = join(skillDir, entry.name);
if (!isSafeEntry(filePath))
continue;
const content = readOptionalText(filePath, config);
if (content === null)
continue;
chunks.push(content);
sources.push(entry.name);
}
}
catch {
// Directory unreadable: ignore - composition continues with what we have.
}
}
const composed = chunks.join("\n\n---\n\n");
if (utf8ByteLength(composed) <= config.composition.maxComposedBytes) {
return { raw: rawContent, composed, sources, notes };
}
notes.push(`composition truncated at ${Math.round(config.composition.maxComposedBytes / 1024)}KB`);
return {
raw: rawContent,
composed: truncateUtf8Bytes(composed, config.composition.maxComposedBytes),
sources,
notes,
};
}
/**
* Count exact Markdown heading levels so rubric section counts are deterministic.
*
* @param content - Markdown text; only lines beginning with the exact `#` run plus a space match.
* @param level - heading depth to count (1 for `# `, 2 for `## `); deeper or shallower headings are ignored.
* @returns the number of headings at exactly that level; 0 when none match (not an error).
*/
export function countHeadings(content, level) {
const prefix = "#".repeat(level) + " ";
return content.split("\n").filter((l) => l.startsWith(prefix)).length;
}
/**
* Centralise section checks so rubric regexes stay scoped to Markdown content.
*
* @param content - artifact text to test the section pattern against.
* @param pattern - caller-owned regex; its flags (case, multiline) are respected as-is.
* @returns true when the pattern matches anywhere in the content.
*/
export function hasSection(content, pattern) {
return pattern.test(content);
}
/**
* Remove frontmatter before tool-keyword scoring so version metadata cannot earn credit.
*
* @param content - artifact text that may open with a `---` fenced YAML frontmatter block.
* @returns the content with a leading frontmatter block stripped; unchanged when there is none.
*/
export function stripYamlFrontmatter(content) {
return content.replace(/^---\s*\n[\s\S]*?\n---\s*\n?/u, "");
}
/**
* Estimate token load conservatively for budget scoring without invoking a tokenizer.
*
* @param content - text whose token cost is being approximated for the token-budget metric.
* @returns a rounded-up estimate using the ~4-chars-per-token heuristic; an over-estimate, not exact.
*/
export function estimateTokens(content) {
return Math.ceil(content.length / 4);
}
export function countSubReferences(projectRoot, artifact) {
if (artifact.kind !== "skill")
return 0;
const referencesDir = join(projectRoot, dirname(artifact.path), "references");
if (!existsSync(referencesDir) || !statSync(referencesDir).isDirectory()) {
return 0;
}
return readdirSync(referencesDir)
.filter((file) => file.endsWith(".md"))
.filter((file) => isSafeEntry(join(referencesDir, file))).length;
}
//# sourceMappingURL=skill-quality-content.js.map