graphlit-client
Version:
Graphlit API Client for TypeScript
224 lines (223 loc) • 8.64 kB
JavaScript
import { createRequire } from "node:module";
import * as Types from "../generated/graphql-types.js";
// ── Singleton tiktoken encoder (best-effort load) ───────────────────────────
let encoder;
try {
const require = createRequire(import.meta.url);
const { Tiktoken } = require("js-tiktoken/lite");
const ranks = require("js-tiktoken/ranks/o200k_base");
encoder = new Tiktoken(ranks);
if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
console.debug("[graphlit-sdk] tiktoken encoder loaded (o200k_base) — accurate token counting enabled");
}
}
catch {
// js-tiktoken not installed — fall back to heuristic
if (process.env.DEBUG_GRAPHLIT_SDK_INITIALIZATION) {
console.debug("[graphlit-sdk] js-tiktoken not available — using heuristic token estimation (chars / 3.5)");
}
}
/** Returns `true` when js-tiktoken is installed and the encoder loaded successfully. */
export function isAccurateTokenCounting() {
return encoder !== undefined;
}
/**
* Token estimation.
*
* When js-tiktoken is installed, returns an accurate BPE token count (o200k_base encoding).
* Otherwise falls back to a conservative heuristic: chars / 3.5.
*/
export function estimateTokens(text) {
if (!text)
return 0;
if (encoder)
return encoder.encode(text).length;
return Math.ceil(text.length / 3.5);
}
export const DEFAULT_CONTEXT_STRATEGY = {
toolResultTokenLimit: 128_000,
toolRoundLimit: 10,
rebudgetThreshold: 0.75,
};
/**
* Tracks token budget during streaming agent tool loops.
*
* Initialized from server-provided accurate token counts (via formatConversation details),
* then uses character-based heuristic estimation for incremental additions during the loop.
*/
export class TokenBudgetTracker {
tokenLimit;
completionTokenLimit;
_usedTokens;
constructor(tokenLimit, completionTokenLimit, initialUsedTokens) {
this.tokenLimit = tokenLimit;
this.completionTokenLimit = completionTokenLimit;
this._usedTokens = initialUsedTokens;
}
/**
* Create a tracker from formatConversation response details.
* Returns undefined if the details lack token information.
*/
static fromDetails(details) {
if (!details.tokenLimit)
return undefined;
const tokenLimit = details.tokenLimit;
const completionTokenLimit = details.completionTokenLimit ?? 4096;
const usedTokens = details.messages?.reduce((sum, msg) => sum + (msg?.tokens ?? 0), 0) ?? 0;
return new TokenBudgetTracker(tokenLimit, completionTokenLimit, usedTokens);
}
/** Total available token budget (tokenLimit - completionTokenLimit, at 95% ceiling) */
get budget() {
return Math.floor((this.tokenLimit - this.completionTokenLimit) * 0.95);
}
/** Current estimated token usage */
get usedTokens() {
return this._usedTokens;
}
/** Remaining tokens before budget is exhausted */
get remaining() {
return Math.max(0, this.budget - this._usedTokens);
}
/** Current usage as a percentage (0-100) */
get usagePercent() {
if (this.budget <= 0)
return 100;
return Math.round((this._usedTokens / this.budget) * 100);
}
/** Model's full context token limit */
get maxTokens() {
return this.tokenLimit;
}
/** Track addition of new message content */
addMessage(text, serverTokenCount) {
this._usedTokens += serverTokenCount ?? estimateTokens(text);
}
/** Check if we need to trigger windowing/re-budgeting */
needsRebudget(threshold) {
return this.usagePercent >= threshold * 100;
}
/** Reset tracker from a fresh set of messages (after windowing) */
resetFromMessages(messages) {
this._usedTokens = messages.reduce((sum, msg) => {
if (msg.tokens)
return sum + msg.tokens;
return sum + estimateTokens(msg.message ?? "");
}, 0);
}
/** Get current usage snapshot for emitting events */
getUsageSnapshot() {
return {
usedTokens: this._usedTokens,
maxTokens: this.tokenLimit,
percentage: this.usagePercent,
remainingTokens: this.remaining,
};
}
}
/**
* Truncates a tool result to fit within a token budget.
*
* Attempts to find a clean break point (JSON boundary or newline).
* Appends a [truncated] marker so the LLM knows data was cut.
*/
export function truncateToolResult(result, maxTokens, toolName) {
const text = typeof result === "string" ? result : JSON.stringify(result);
if (!text)
return "";
const estimatedTokens = estimateTokens(text);
if (estimatedTokens <= maxTokens)
return text;
// When tiktoken is available, compute the actual chars-per-token ratio for
// this specific text instead of using the hardcoded 3.5 heuristic.
const charsPerToken = encoder && estimatedTokens > 0
? text.length / estimatedTokens
: 3.5;
const maxChars = Math.floor(maxTokens * charsPerToken);
let truncated = text.substring(0, maxChars);
// Try to find a clean break point
if (text.startsWith("{") || text.startsWith("[")) {
// For JSON, try to close at a valid boundary
const lastComplete = Math.max(truncated.lastIndexOf("},"), truncated.lastIndexOf("}\n"), truncated.lastIndexOf("],"), truncated.lastIndexOf("]\n"));
if (lastComplete > maxChars * 0.5) {
truncated = truncated.substring(0, lastComplete + 1);
}
}
else {
// For plain text, break at newline
const lastNewline = truncated.lastIndexOf("\n");
if (lastNewline > maxChars * 0.5) {
truncated = truncated.substring(0, lastNewline);
}
}
const truncatedTokens = estimateTokens(truncated);
return `${truncated}\n\n[truncated by ${toolName}: original ~${estimatedTokens} tokens, showing first ~${truncatedTokens} tokens]`;
}
/**
* Identifies the boundary between "header" messages (system prompt, conversation history,
* initial user message) and "tool round" messages (assistant+tool pairs from the agentic loop).
*
* Tool rounds start at the first assistant message that has tool calls.
*/
function findToolRoundStart(messages) {
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (msg.role === Types.ConversationRoleTypes.Assistant &&
msg.toolCalls &&
msg.toolCalls.length > 0) {
return i;
}
}
return messages.length; // No tool rounds found
}
/**
* Groups tool-round messages into logical rounds.
* Each round = one assistant message (with tool calls) + all subsequent tool response messages.
*/
function groupToolRounds(toolMessages) {
const rounds = [];
let currentRound = [];
for (const msg of toolMessages) {
if (msg.role === Types.ConversationRoleTypes.Assistant &&
currentRound.length > 0) {
// New assistant message starts a new round
rounds.push(currentRound);
currentRound = [msg];
}
else {
currentRound.push(msg);
}
}
if (currentRound.length > 0) {
rounds.push(currentRound);
}
return rounds;
}
/**
* Windows tool rounds to keep the messages array within budget.
*
* Preserves:
* - "Header" messages (system prompt, conversation history, initial user message)
* - The most recent `keepRounds` tool rounds
*
* Drops older tool rounds and inserts a system message noting what was removed.
*
* @returns The windowed messages array
*/
export function windowToolRounds(messages, keepRounds) {
const headerEnd = findToolRoundStart(messages);
const header = messages.slice(0, headerEnd);
const toolMessages = messages.slice(headerEnd);
const rounds = groupToolRounds(toolMessages);
if (rounds.length <= keepRounds)
return messages;
const keptRounds = rounds.slice(-keepRounds);
const droppedCount = rounds.length - keepRounds;
// Summary marker so the LLM knows context was trimmed
const summaryMessage = {
__typename: "ConversationMessage",
role: Types.ConversationRoleTypes.System,
message: `[Context management: ${droppedCount} earlier tool calling round(s) were removed to stay within token limits. The most recent ${keepRounds} round(s) are preserved below.]`,
timestamp: new Date().toISOString(),
};
return [...header, summaryMessage, ...keptRounds.flat()];
}