@blundergoat/goat-flow
Version:
AI coding agent harness and local dashboard for Claude Code, OpenAI Codex, Google Antigravity, and GitHub Copilot - setup audits, guardrails, structured skills, deny hooks, and persistent learning loops.
141 lines • 5.07 kB
TypeScript
/**
* Shared type vocabulary for the skill-quality scoring pipeline: the artifact inventory record,
* classification and shape-detection results, the per-metric result rows, and the report schema
* that CLI JSON, dashboard routes, and prompt composition all consume.
*
* This module is the contract boundary between the scorers (which produce MetricResult rows) and
* every reader of a SkillQualityReport, so changing a field here ripples to those consumers - keep
* the public shapes stable. It also hosts the small `finalizeMetric` helper that every metric
* scorer routes through to apply subtype-specific max-score capping consistently.
*/
import type { ArtifactKind, ArtifactSource, ArtifactSubtype, MetricName, QualityConfig } from "./quality-config.js";
/**
* Disposition the rubric recommends for an artifact, from `keep-skill` (healthy) through revision
* and reclassification hints to `retire`. `needs-human-review` is the escape hatch when scores are
* strong but classification confidence is too low to act on automatically.
*/
export type Recommendation = "keep-skill" | "consider-revision" | "consider-reclassifying" | "reference-playbook" | "retire" | "needs-human-review";
/**
* Severity band for one metric row, derived from its score-to-max ratio. `n/a` means the metric
* does not apply to the artifact's subtype (max score 0), not that it scored zero - dashboards must
* distinguish the two.
*/
export type MetricSeverity = "ok" | "warn" | "fail" | "n/a";
/**
* Lower-ranked subtype match shown to reviewers when classification is ambiguous.
*/
interface ClassificationAlternative {
subtype: ArtifactSubtype;
score: number;
}
/**
* Applied scoring profile plus the evidence explaining why that subtype won.
*/
export interface ClassificationResult {
detectedSubtype: ArtifactSubtype;
/** 0-1 - how strongly the top subtype dominates alternatives. */
confidence: number;
alternatives: ClassificationAlternative[];
reasoning: string[];
}
/**
* Semantic shape detected independently from the scoring profile to catch misfiled artifacts.
*/
export interface ShapeDetectionResult {
detectedShape: ArtifactSubtype;
confidence: number;
alternatives: ClassificationAlternative[];
reasoning: string[];
}
/**
* Inventory record surfaced by the CLI and dashboard; paths stay project-relative.
*/
export interface ArtifactEntry {
id: string;
name: string;
path: string;
kind: ArtifactKind;
source: ArtifactSource;
mirrorPaths?: string[];
missingMirrors?: string[];
}
/**
* Recommendation hints emitted by fit metrics without altering the numeric score.
*/
export interface MetricSignals {
shouldPromote?: boolean;
shouldDemote?: boolean;
isMetaReference?: boolean;
}
/**
* One rubric row after subtype-specific max-score capping has been applied.
*/
export interface MetricResult {
metric: MetricName;
label: string;
score: number;
maxScore: number;
severity: MetricSeverity;
detail: string;
signals?: MetricSignals | undefined;
}
/**
* Stable public report schema consumed by CLI JSON, dashboard routes, and prompts.
*/
export interface SkillQualityReport {
artifact: ArtifactEntry;
totalScore: number;
maxTotalScore: number;
profileMax: number;
/** Applied scoring profile. Keep stable for existing consumers. */
subtype: ArtifactSubtype;
/** Semantic content shape detected independently from the scoring profile. */
detectedShape: ArtifactSubtype;
shapeConfidence: number;
shapeMismatch: boolean;
classification: ClassificationResult;
recommendation: Recommendation;
metrics: MetricResult[];
composedFrom: string[];
fitNotes: string[];
}
/**
* Shared scorer input that carries both raw artifact text and composed context.
*/
export interface MetricInput {
rawContent: string;
composedContent: string;
artifact: ArtifactEntry;
subtype: ArtifactSubtype;
profileMax: number;
projectRoot: string;
config: QualityConfig;
}
/**
* Read result with truncation notes kept separate from content so scoring remains deterministic.
*/
export interface ReadContentResult {
content: string;
notes: string[];
}
/**
* Composed scoring surface plus provenance shown in `composedFrom`.
*/
export interface ComposeResult {
raw: string;
composed: string;
sources: string[];
notes: string[];
}
export interface ComposeOptions {
scanDisk?: boolean;
}
/**
* Signature every rubric metric implements: pure function from the shared scorer input to one
* capped result row. Scorers must be deterministic and side-effect free (no disk reads) so the
* same content always yields the same score; all I/O happens before scoring, in MetricInput.
*/
export type MetricScorer = (input: MetricInput) => MetricResult;
export declare function finalizeMetric(input: MetricInput, metric: MetricName, score: number, detail: string, signals?: MetricSignals): MetricResult;
export {};
//# sourceMappingURL=skill-quality-types.d.ts.map