claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
105 lines • 3.57 kB
TypeScript
/**
* GAIA Hardness Predictor — Linear Classifier (ADR-136 Track Q)
*
* Classifies GAIA questions into easy / medium / hard using a
* hand-rolled logistic regression (no external ML dependencies).
*
* Training:
* `predictor.train(labeledData)` — fits weights via gradient descent
* on cross-entropy loss using the 17-dim feature vectors.
*
* Inference:
* `predictor.predict(question)` — returns difficulty class + confidence
* + a ComputeBudget that drives model/turns/voting choices in gaia-bench.
*
* Cold-start:
* When untrained (weights = null), classifies everything as "medium".
* This is the correct safe default: no wasted Haiku-on-hard, no missed
* Sonnet-on-easy.
*
* Compute budget policy (from ADR-136 Track Q research):
* easy → Haiku, max 4 turns, 1 attempt
* medium → Sonnet, max 8 turns, 1 attempt
* hard → Sonnet, max 12 turns, 3-vote (Track A)
*
* Conservative threshold:
* If in doubt, classify UP (medium→hard preferred over medium→easy).
* `conservativeMode: true` (default) shifts the easy/medium boundary
* so fewer questions fall into "easy".
*
* Refs: ADR-136, ADR-135, #2156
*/
import type { GaiaQuestion } from '../gaia-loader.js';
import { type FeatureVector } from './features.js';
export type DifficultyClass = 'easy' | 'medium' | 'hard';
export interface ComputeBudget {
model: 'haiku' | 'sonnet';
maxTurns: number;
votingAttempts: number;
}
export interface PredictionResult {
difficulty: DifficultyClass;
confidence: number;
budget: ComputeBudget;
features: FeatureVector;
}
export interface LabeledExample {
question: GaiaQuestion;
wasCorrect: boolean;
/** Number of turns the agent used (optional; used to refine label). */
turns?: number;
}
export declare const COMPUTE_BUDGETS: Record<DifficultyClass, ComputeBudget>;
export declare class HardnessPredictor {
/**
* Weight matrix: weights[classIdx][featureIdx].
* null = untrained (cold-start: return medium for everything).
*/
private weights;
/** Bias terms per class. */
private biases;
/** Whether conservative mode is active (default: true). */
private readonly conservativeMode;
constructor(options?: {
conservativeMode?: boolean;
});
/**
* Returns true when the predictor has been trained and is ready
* to make non-trivial predictions.
*/
get isTrained(): boolean;
/**
* Train the linear classifier using labelled examples from prior runs.
*
* Labelling strategy (weak supervision):
* - All correct + turns ≤ median turns → easy
* - All correct + turns > median turns → medium
* - Incorrect → hard
*
* With < 10 examples, refuses to train (cold-start is safer).
* With 10-50 examples, trains but sets `conservativeMode`-threshold high.
*/
train(labeledData: LabeledExample[]): void;
/**
* Predict the hardness class of a single GAIA question.
*
* Cold-start (untrained): returns medium with confidence=0.5.
*/
predict(question: GaiaQuestion): PredictionResult;
/**
* Export weights as a plain JSON-serialisable object.
* Returns null if untrained.
*/
export(): {
weights: number[][];
biases: number[];
} | null;
/**
* Import previously exported weights.
*/
import(state: {
weights: number[][];
biases: number[];
}): void;
}
//# sourceMappingURL=predictor.d.ts.map