UNPKG

graphlit-client

Version:
92 lines (91 loc) 3.85 kB
import * as Types from "../generated/graphql-types.js"; /** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */ export declare function isAccurateTokenCounting(): boolean; /** * Token estimation. * * When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding). * Otherwise falls back to a conservative heuristic: chars / 3.5. */ export declare function estimateTokens(text: string): number; /** * Configuration for context window management during agentic tool loops. * Values can be provided by the server (via ConversationStrategy) or set client-side. */ export interface ContextStrategyConfig { /** Max tokens for any single tool result. Results exceeding this are truncated. Default: 128000 */ toolResultTokenLimit: number; /** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */ toolRoundLimit: number; /** Fraction of token budget at which client-side windowing is triggered. Default: 0.75 */ rebudgetThreshold: number; } export declare const DEFAULT_CONTEXT_STRATEGY: ContextStrategyConfig; /** * Tracks token budget during streaming agent tool loops. * * Initialized from server-provided accurate token counts (via formatConversation details), * then uses character-based heuristic estimation for incremental additions during the loop. */ export declare class TokenBudgetTracker { private readonly tokenLimit; private readonly completionTokenLimit; private _usedTokens; constructor(tokenLimit: number, completionTokenLimit: number, initialUsedTokens: number); /** * Create a tracker from formatConversation response details. * Returns undefined if the details lack token information. */ static fromDetails(details: { tokenLimit?: number | null; completionTokenLimit?: number | null; messages?: Array<{ tokens?: number | null; } | null> | null; }): TokenBudgetTracker | undefined; /** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */ get budget(): number; /** Current estimated token usage */ get usedTokens(): number; /** Remaining tokens before budget is exhausted */ get remaining(): number; /** Current usage as a percentage (0-100) */ get usagePercent(): number; /** Model's full context token limit */ get maxTokens(): number; /** Track addition of new message content */ addMessage(text: string, serverTokenCount?: number): void; /** Check if we need to trigger windowing/re-budgeting */ needsRebudget(threshold: number): boolean; /** Reset tracker from a fresh set of messages (after windowing) */ resetFromMessages(messages: Array<{ message?: string | null; tokens?: number | null; }>): void; /** Get current usage snapshot for emitting events */ getUsageSnapshot(): { usedTokens: number; maxTokens: number; percentage: number; remainingTokens: number; }; } /** * Truncates a tool result to fit within a token budget. * * Attempts to find a clean break point (JSON boundary or newline). * Appends a [truncated] marker so the LLM knows data was cut. */ export declare function truncateToolResult(result: unknown, maxTokens: number, toolName: string): string; /** * Windows tool rounds to keep the messages array within budget. * * Preserves: * - "Header" messages (system prompt, conversation history, initial user message) * - The most recent `keepRounds` tool rounds * * Drops older tool rounds and inserts a system message noting what was removed. * * @returns The windowed messages array */ export declare function windowToolRounds(messages: Types.ConversationMessage[], keepRounds: number): Types.ConversationMessage[];