graphlit-client
Version:
Graphlit API Client for TypeScript
92 lines (91 loc) • 3.85 kB
TypeScript
import * as Types from "../generated/graphql-types.js";
/** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
export declare function isAccurateTokenCounting(): boolean;
/**
* Token estimation.
*
* When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
* Otherwise falls back to a conservative heuristic: chars / 3.5.
*/
export declare function estimateTokens(text: string): number;
/**
* Configuration for context window management during agentic tool loops.
* Values can be provided by the server (via ConversationStrategy) or set client-side.
*/
export interface ContextStrategyConfig {
/** Max tokens for any single tool result. Results exceeding this are truncated. Default: 128000 */
toolResultTokenLimit: number;
/** Max tool call/response rounds to keep in context. Older rounds are dropped FIFO. Default: 10 */
toolRoundLimit: number;
/** Fraction of token budget at which client-side windowing is triggered. Default: 0.75 */
rebudgetThreshold: number;
}
export declare const DEFAULT_CONTEXT_STRATEGY: ContextStrategyConfig;
/**
* Tracks token budget during streaming agent tool loops.
*
* Initialized from server-provided accurate token counts (via formatConversation details),
* then uses character-based heuristic estimation for incremental additions during the loop.
*/
export declare class TokenBudgetTracker {
private readonly tokenLimit;
private readonly completionTokenLimit;
private _usedTokens;
constructor(tokenLimit: number, completionTokenLimit: number, initialUsedTokens: number);
/**
* Create a tracker from formatConversation response details.
* Returns undefined if the details lack token information.
*/
static fromDetails(details: {
tokenLimit?: number | null;
completionTokenLimit?: number | null;
messages?: Array<{
tokens?: number | null;
} | null> | null;
}): TokenBudgetTracker | undefined;
/** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
get budget(): number;
/** Current estimated token usage */
get usedTokens(): number;
/** Remaining tokens before budget is exhausted */
get remaining(): number;
/** Current usage as a percentage (0-100) */
get usagePercent(): number;
/** Model's full context token limit */
get maxTokens(): number;
/** Track addition of new message content */
addMessage(text: string, serverTokenCount?: number): void;
/** Check if we need to trigger windowing/re-budgeting */
needsRebudget(threshold: number): boolean;
/** Reset tracker from a fresh set of messages (after windowing) */
resetFromMessages(messages: Array<{
message?: string | null;
tokens?: number | null;
}>): void;
/** Get current usage snapshot for emitting events */
getUsageSnapshot(): {
usedTokens: number;
maxTokens: number;
percentage: number;
remainingTokens: number;
};
}
/**
* Truncates a tool result to fit within a token budget.
*
* Attempts to find a clean break point (JSON boundary or newline).
* Appends a [truncated] marker so the LLM knows data was cut.
*/
export declare function truncateToolResult(result: unknown, maxTokens: number, toolName: string): string;
/**
* Windows tool rounds to keep the messages array within budget.
*
* Preserves:
* - "Header" messages (system prompt, conversation history, initial user message)
* - The most recent `keepRounds` tool rounds
*
* Drops older tool rounds and inserts a system message noting what was removed.
*
* @returns The windowed messages array
*/
export declare function windowToolRounds(messages: Types.ConversationMessage[], keepRounds: number): Types.ConversationMessage[];