graphlit-client
Version:
Graphlit API Client for TypeScript
201 lines (200 loc) • 9.64 kB
JavaScript
const MAX_MULTIPLIER = 3.0;
/**
* Manages budget enforcement, wind-down protocol, instruction building,
* and adaptive budget adjustment for the multi-turn agent harness.
*/
export class TurnEvaluator {
maxTurns;
maxWallClockMs;
maxToolCalls;
windDownTurns;
initialPrompt;
extensionGranted = false;
constructor(config) {
this.maxTurns = config.maxTurns;
this.maxWallClockMs = config.maxWallClockMs;
this.maxToolCalls = config.maxToolCalls;
this.windDownTurns = config.windDownTurns;
this.initialPrompt = config.initialPrompt;
}
// ── Adaptive budget ──────────────────────────────────────────────────────
/**
* Uses an LLM to classify the complexity of the initial prompt and returns
* a budget multiplier (1.0–3.0). Falls back to 1.0 on any error.
*/
async classifyComplexity(prompt, extractFn) {
try {
const classificationTool = {
name: "classify_complexity",
description: "Classify the complexity of a task to determine resource allocation.",
schema: JSON.stringify({
type: "object",
properties: {
multiplier: {
type: "number",
description: "Budget multiplier from 1.0 (simple) to 3.0 (highly complex). " +
"1.0 = simple single-item task, 1.5 = moderate multi-step task, " +
"2.0 = complex multi-item research, 3.0 = comprehensive deep analysis.",
minimum: 1.0,
maximum: 3.0,
},
reason: {
type: "string",
description: "Brief explanation of the complexity assessment.",
},
},
required: ["multiplier", "reason"],
}),
};
const classificationPrompt = "Evaluate the following task and classify its complexity. Consider:\n" +
"- Task scope: single-item vs multi-item vs comprehensive\n" +
"- Research depth required: surface summary vs deep analysis\n" +
"- Number of distinct sub-tasks\n" +
"- Expected tool call volume\n\n" +
"Call the classify_complexity tool with your assessment.";
const results = await extractFn(classificationPrompt, prompt, [
classificationTool,
]);
if (results && results.length > 0) {
const result = results[0];
if (result?.value) {
const parsed = JSON.parse(result.value);
const clamped = Math.min(MAX_MULTIPLIER, Math.max(1.0, parsed.multiplier));
return { multiplier: clamped, reason: parsed.reason };
}
}
}
catch {
// Classification failure is non-fatal — fall back to 1x
}
return { multiplier: 1.0, reason: "Default (classification unavailable)" };
}
/** Apply a multiplier to all budget limits. */
adjustBudget(multiplier) {
this.maxTurns = Math.ceil(this.maxTurns * multiplier);
this.maxWallClockMs = Math.ceil(this.maxWallClockMs * multiplier);
this.maxToolCalls = Math.ceil(this.maxToolCalls * multiplier);
}
/** Current adjusted maxTurns. */
get adjustedMaxTurns() {
return this.maxTurns;
}
/** Current adjusted maxWallClockMs. */
get adjustedMaxWallClockMs() {
return this.maxWallClockMs;
}
/** Current adjusted maxToolCalls. */
get adjustedMaxToolCalls() {
return this.maxToolCalls;
}
// ── Wind-down ────────────────────────────────────────────────────────────
/** Returns true when the agent should begin winding down. */
shouldWindDown(turn, elapsedMs, totalToolCalls, contextWindow) {
// Within windDownTurns of maxTurns
if (turn >= this.maxTurns - this.windDownTurns)
return true;
// Elapsed > 80% of max wall clock
if (elapsedMs > this.maxWallClockMs * 0.8)
return true;
// Context window > 90%
if (contextWindow && contextWindow.percentage > 90)
return true;
// Tool calls > 90% of max
if (totalToolCalls > this.maxToolCalls * 0.9)
return true;
return false;
}
// ── Extension ────────────────────────────────────────────────────────────
/**
* One-time budget extension if the agent is making steady progress.
* Grants windDownTurns * 2 additional turns.
*/
shouldGrantExtension(turnResults, contextPercent) {
if (this.extensionGranted) {
return { grant: false, extraTurns: 0 };
}
// Must have at least 3 turns of history to evaluate progress
if (turnResults.length < 3) {
return { grant: false, extraTurns: 0 };
}
// Context must be under 60%
if (contextPercent !== undefined && contextPercent >= 60) {
return { grant: false, extraTurns: 0 };
}
// Recent turns must have successful tool calls (no all-error turns)
const recentTurns = turnResults.slice(-3);
const hasErrors = recentTurns.some((t) => t.toolCallCount > 0 &&
(t.errors?.length ?? 0) >= t.toolCallCount);
if (hasErrors) {
return { grant: false, extraTurns: 0 };
}
// Must be actively using tools
const hasActivity = recentTurns.some((t) => t.toolCallCount > 0);
if (!hasActivity) {
return { grant: false, extraTurns: 0 };
}
const extraTurns = this.windDownTurns * 2;
this.extensionGranted = true;
this.maxTurns += extraTurns;
return { grant: true, extraTurns };
}
// ── Instruction building ─────────────────────────────────────────────────
/**
* Build the `instructions` parameter for a turn. Returns undefined for
* normal continuation turns (enables bare fast-path).
*/
buildTurnInstructions(config) {
const parts = [];
// Stuck intervention (highest priority)
if (config.isStuckIntervention) {
const patternDescriptions = {
repeating_tool_calls: "calling the same tools repeatedly with similar arguments",
repeating_responses: "generating very similar responses across multiple turns",
error_loop: "encountering errors on all tool calls for multiple consecutive turns",
empty_turns: "producing empty turns with no tool calls or progress",
};
const description = patternDescriptions[config.isStuckIntervention] ??
config.isStuckIntervention;
parts.push(`You appear to be ${description}. ` +
"Take a different approach: try alternative tools, different arguments, " +
"or reconsider your strategy. If the task cannot be completed with " +
"available tools, call task_complete with a summary of what was accomplished " +
"and what remains.");
}
// Wind-down
if (config.isWindingDown) {
parts.push(`You are running low on remaining capacity (${config.turnsRemaining} turn(s) remaining). ` +
"Wrap up your work: synthesize your findings, provide your final answer, " +
"and call task_complete. Do not start new lines of investigation.");
}
// Context pressure
if (config.contextWindowPercent !== undefined &&
config.contextWindowPercent > 85) {
parts.push(`Context window is at ${config.contextWindowPercent}% capacity. ` +
"Be concise in your responses and tool usage to avoid running out of context.");
}
// Summarization trigger: >70% context AND early in run (< 50% turns used)
if (config.needsSummarization) {
parts.push("Before continuing, summarize your progress so far in a few sentences. " +
"This will help manage context window usage for the remaining work.");
}
// Task restatement every 5th turn
if (config.originalTaskSummary) {
parts.push(`Reminder: your original task is: ${config.originalTaskSummary}`);
}
if (parts.length === 0)
return undefined;
return parts.join("\n\n");
}
// ── Hard budget checks ───────────────────────────────────────────────────
/** Returns true if any hard budget limit has been exhausted. */
isBudgetExhausted(turn, elapsedMs, totalToolCalls) {
if (turn >= this.maxTurns)
return true;
if (elapsedMs >= this.maxWallClockMs)
return true;
if (totalToolCalls >= this.maxToolCalls)
return true;
return false;
}
}