UNPKG

@blundergoat/goat-flow

Version:

AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.

294 lines 12.1 kB
import { hasSection } from "./skill-quality-content.js"; const SUBTYPE_NAME_MATCH_SCORE = 5; // Threshold: name match outweighs two heading hits so canonical skills keep their calibrated subtype. const SUBTYPE_HEADING_MATCH_SCORE = 2; const SUBTYPE_FALLBACK_SCORE = 1; const FALLBACK_ONLY_CONFIDENCE = 0.3; /** * Detect fallback-only matches so confidence stays visibly low. */ function isFallbackOnlyMatch(match) { return (match.score === SUBTYPE_FALLBACK_SCORE && match.reasoning.some((reason) => reason.includes("fallback"))); } function subtypeConfidence(top, second) { if (isFallbackOnlyMatch(top)) return FALLBACK_ONLY_CONFIDENCE; return second === undefined ? 1 : top.score / (top.score + second.score); } /** * Score a single subtype's detection against an artifact. Returns 0 when the * subtype is incompatible (wrong kind or vetoed by a `mustNotHave` heading). * * Scoring rules: * - Name match: +SUBTYPE_NAME_MATCH_SCORE (unconditional - preserves the * OR semantics where `goat-security` is a report regardless of Step 0). * - Heading match: +SUBTYPE_HEADING_MATCH_SCORE per match, vetoed by * `mustNotHave`. Heading-only matches that trigger `mustNotHave` return 0. * - Empty rules (fallback subtype): SUBTYPE_FALLBACK_SCORE so the fallback * always matches with low confidence. */ // eslint-disable-next-line complexity -- intentional because subtype match scoring exhausts kind compatibility, fallback rules, name-vs-heading scoring, and mustNotHave veto in one place to keep priority semantics local function scoreSubtypeMatch(artifact, content, detection, subtype) { const reasoning = []; if (detection.kinds.length > 0 && !detection.kinds.includes(artifact.kind)) { return { subtype, score: 0, reasoning }; } if (detection.namePatterns.length === 0 && detection.headingPatterns.length === 0) { reasoning.push(`fallback for kind=${artifact.kind}`); return { subtype, score: SUBTYPE_FALLBACK_SCORE, reasoning }; } let score = 0; if (detection.namePatterns.includes(artifact.name)) { score += SUBTYPE_NAME_MATCH_SCORE; reasoning.push(`name "${artifact.name}" in name-patterns`); } let headingMatched = false; for (const pattern of detection.headingPatterns) { if (new RegExp(pattern, "i").test(content)) { score += SUBTYPE_HEADING_MATCH_SCORE; headingMatched = true; reasoning.push(`heading "${pattern}" present`); } } if (score === 0) return { subtype, score: 0, reasoning }; const nameMatched = detection.namePatterns.includes(artifact.name); if (!nameMatched && headingMatched) { for (const veto of detection.mustNotHave) { if (new RegExp(veto, "i").test(content)) { reasoning.push(`vetoed by must-not-have "${veto}"`); return { subtype, score: 0, reasoning }; } } } return { subtype, score, reasoning }; } const REFERENCE_DETECTION_ORDER = [ "meta", "index", "playbook", ]; const SKILL_DETECTION_ORDER = [ "dispatcher", "report", "workflow", ]; /** * Classify an artifact across all candidate subtypes. The detected subtype * is the highest-scoring match; confidence is `top / (top + second)` to * communicate how dominant the leading subtype is. * * @param artifact - inventory record; its `kind` selects the fixed subtype detection order. * @param content - raw artifact text matched against name/heading patterns and must-not-have vetoes. * @param config - quality config supplying each subtype's detection rules and profiles. * @returns the winning subtype with confidence and the ranked alternatives kept for reviewer * context; confidence near 0.3 flags a fallback-only match that needs human subtype review. */ export function classifyArtifact(artifact, content, config) { const order = artifact.kind === "shared-reference" ? REFERENCE_DETECTION_ORDER : SKILL_DETECTION_ORDER; const matches = order .map((subtype) => scoreSubtypeMatch(artifact, content, config.subtypes[subtype].detection, subtype)) .filter((match) => match.score > 0) .sort((a, b) => b.score - a.score); if (matches.length === 0) { const fallback = artifact.kind === "shared-reference" ? "playbook" : "workflow"; return { detectedSubtype: fallback, confidence: 0, alternatives: [], reasoning: [ `no subtype matched ${artifact.id}; using ${fallback} as fallback`, ], }; } const top = matches[0]; if (!top) { const fallback = artifact.kind === "shared-reference" ? "playbook" : "workflow"; return { detectedSubtype: fallback, confidence: 0, alternatives: [], reasoning: ["unreachable: empty match list after pre-filter"], }; } const rest = matches.slice(1); const second = rest[0]; const confidence = subtypeConfidence(top, second); const reasoning = [ `detected ${top.subtype} (score ${top.score}): ${top.reasoning.join("; ")}`, ...rest.map((m) => `also matched ${m.subtype} (score ${m.score}): ${m.reasoning.join("; ")}`), ]; return { detectedSubtype: top.subtype, confidence, alternatives: rest.map(({ subtype, score }) => ({ subtype, score })), reasoning, }; } const SHAPE_DETECTION_ORDER = [ "dispatcher", "report", "workflow", "playbook", "index", "meta", ]; const MIN_SHAPE_SCORE = 3; // Threshold: one strong signal or two heading signals required before shape mismatch can fire. /** * Count pattern hits when repeated tool or step references are meaningful signals. */ function countRegexMatches(content, pattern) { return Array.from(content.matchAll(pattern)).length; } function scoreFromSignals(subtype, signals) { let score = 0; const reasoning = []; for (const [matched, value, reason] of signals) { if (!matched) continue; score += value; reasoning.push(reason); } return { subtype, score, reasoning }; } // eslint-disable-next-line complexity -- intentional because semantic shape has separate signal sets per supported subtype; splitting would obscure the scoring table function scoreShapeMatch(artifact, content, subtype) { const hasStepZero = hasSection(content, /##\s+Step 0/i); const hasCheckpoint = /\bCHECKPOINT\b/i.test(content); const hasModeSystem = /\b(?:Read-Only|File-Write)\b|\bPlan\b.*\bmode\b|\bImplement\b.*\bmode\b/i.test(content); const hasPhaseHeadings = /^##\s+Phase\s+\d/im.test(content); const hasSkillVersion = /goat-flow-skill-version/i.test(content); const hasFrontmatterName = /^---[\s\S]*?name:\s*.+[\s\S]*?---/m.test(content); const browserToolRefs = countRegexMatches(content, /\bbrowser_[A-Za-z0-9_]+\b/g); const mcpToolRefs = countRegexMatches(content, /\bmcp__[A-Za-z0-9_]+\b/g); const proceduralStepCount = countRegexMatches(content, /^##\s+Step\s+\d\b/gim); if (subtype === "dispatcher") { return scoreFromSignals(subtype, [ [artifact.name === "goat", 5, 'name "goat"'], [hasSection(content, /##\s+Route Map/i), 5, "Route Map section"], [/\b(?:route|dispatch)\b/i.test(content), 2, "routing language"], ]); } if (subtype === "report") { return scoreFromSignals(subtype, [ [artifact.name === "goat-security", 5, 'name "goat-security"'], [ hasSection(content, /##\s+Quick Scan Path/i) || hasSection(content, /##\s+Audit Mode/i), 4, "report/audit heading", ], [ /\b(?:reporting-only|read-only)\b/i.test(content), 2, "report-only language", ], [ /\b(?:finding|OBSERVED|INFERRED)\b/i.test(content), 2, "finding evidence terms", ], ]); } if (subtype === "workflow") { return scoreFromSignals(subtype, [ [hasStepZero, 3, "Step 0 intake"], [hasCheckpoint, 3, "CHECKPOINT gates"], [hasModeSystem, 2, "mode system"], [hasPhaseHeadings, 2, "phase headings"], [hasSkillVersion, 1, "skill version header"], [hasFrontmatterName, 1, "skill frontmatter name"], [ /\b(?:Verification|Proof Gate|Testing Gate)\b/i.test(content), 2, "verification language", ], ]); } if (subtype === "playbook") { return scoreFromSignals(subtype, [ [hasSection(content, /##\s+Environment/i), 2, "Environment section"], [ hasSection(content, /##\s+Prerequisites/i) || hasSection(content, /##\s+Availability Check/i), 2, "prerequisite or availability section", ], [ hasSection(content, /##\s+Common Gotchas/i) || hasSection(content, /Troubleshoot|Fallback/i), 2, "troubleshooting/gotchas", ], [hasSection(content, /##\s+Quick Reference/i), 2, "Quick Reference"], [ browserToolRefs + mcpToolRefs >= 2 || /Playwright\s+MCP/i.test(content), 3, "repeated browser/MCP tool references", ], [proceduralStepCount >= 2, 2, "procedural Step N headings"], [ /\btool\b.*\bprotocol\b|\bobservation\b.*\bworkflow\b|\bcapture\b.*\bworkflow\b|\bInteraction Workflow\b/i.test(content), 2, "tool/playbook workflow language", ], ]); } if (subtype === "index") { return scoreFromSignals(subtype, [ [artifact.name === "skill-quality-testing", 5, "known index name"], [ /Which file to load|Cross-references/i.test(content), 3, "index routing", ], [/index/i.test(artifact.name), 2, "index name"], ]); } return scoreFromSignals(subtype, [ [ artifact.name === "skill-preamble" || artifact.name === "skill-conventions", 6, "known meta-reference name", ], [/goat-flow-reference-version/i.test(content), 2, "reference version"], [ /loaded by every skill|shared conventions/i.test(content), 2, "meta language", ], ]); } export function detectArtifactShape(artifact, content) { const matches = SHAPE_DETECTION_ORDER.map((subtype) => scoreShapeMatch(artifact, content, subtype)) .filter((match) => match.score >= MIN_SHAPE_SCORE) .sort((a, b) => b.score - a.score); const top = matches[0]; if (!top) { const fallback = artifact.kind === "shared-reference" ? "playbook" : "workflow"; return { detectedShape: fallback, confidence: 0, alternatives: [], reasoning: [ `no semantic shape matched ${artifact.id} above MIN_SHAPE_SCORE=${MIN_SHAPE_SCORE}; using ${fallback} as fallback`, ], }; } const rest = matches.slice(1); const second = rest[0]; return { detectedShape: top.subtype, confidence: subtypeConfidence(top, second), alternatives: rest.map(({ subtype, score }) => ({ subtype, score })), reasoning: [ `detected ${top.subtype} shape (score ${top.score}): ${top.reasoning.join("; ")}`, ...rest.map((m) => `also matched ${m.subtype} shape (score ${m.score}): ${m.reasoning.join("; ")}`), ], }; } //# sourceMappingURL=skill-quality-classification.js.map