@buger/probe-chat
Version:
CLI and web interface for Probe code search (formerly @buger/probe-web and @buger/probe-chat)
407 lines (354 loc) • 15.6 kB
JavaScript
import { get_encoding } from 'tiktoken';
/**
* TokenCounter class to track token usage in the chat
*/
export class TokenCounter {
constructor() {
// Initialize the tokenizer with cl100k_base encoding (works for both Claude and GPT models)
try {
// Initialize tokenizer
this.tokenizer = get_encoding('cl100k_base');
// Context window tracking
this.contextSize = 0; // Current size based on history
this.history = []; // Store message history for context calculation
// Token counters
this.requestTokens = 0; // Total prompt tokens over session
this.responseTokens = 0; // Total completion tokens over session
this.currentRequestTokens = 0; // Prompt tokens for the current LLM call
this.currentResponseTokens = 0; // Completion tokens for the current LLM call
// Cache token tracking
this.cacheCreationTokens = 0; // Total Anthropic cache creation tokens
this.cacheReadTokens = 0; // Total Anthropic cache read tokens
this.currentCacheCreationTokens = 0; // Anthropic cache creation for current call
this.currentCacheReadTokens = 0; // Anthropic cache read for current call
this.cachedPromptTokens = 0; // Total OpenAI cached prompt tokens
this.currentCachedPromptTokens = 0; // OpenAI cached prompt for current call
} catch (error) {
console.error('Error initializing tokenizer:', error);
// Fallback to a simple token counting method if tiktoken fails
this.tokenizer = null;
this.contextSize = 0;
this.requestTokens = 0;
this.responseTokens = 0;
this.currentRequestTokens = 0;
this.currentResponseTokens = 0;
this.cacheCreationTokens = 0;
this.cacheReadTokens = 0;
this.currentCacheCreationTokens = 0;
this.currentCacheReadTokens = 0;
this.cachedPromptTokens = 0;
this.currentCachedPromptTokens = 0;
this.history = [];
}
this.debug = process.env.DEBUG_CHAT === '1';
}
/**
* Count tokens in a string using tiktoken or fallback method
* @param {string} text - The text to count tokens for
* @returns {number} - The number of tokens
*/
countTokens(text) {
if (typeof text !== 'string') {
text = String(text); // Ensure text is a string
}
if (this.tokenizer) {
try {
const tokens = this.tokenizer.encode(text);
return tokens.length;
} catch (error) {
// Log only once per session or use a flag? For now, log each time.
// console.warn('Error counting tokens with tiktoken, using fallback method:', error.message);
// Fallback to a simple approximation (1 token ≈ 4 characters)
return Math.ceil(text.length / 4);
}
} else {
// Fallback to a simple approximation (1 token ≈ 4 characters)
return Math.ceil(text.length / 4);
}
}
/**
* Add to request token count (manual counting, less used now with recordUsage)
* @param {string|number} input - The text to count tokens for or the token count directly
*/
addRequestTokens(input) {
let tokenCount = 0;
if (typeof input === 'number') {
tokenCount = input;
} else if (typeof input === 'string') {
tokenCount = this.countTokens(input);
} else {
console.warn('[WARN] Invalid input type for addRequestTokens:', typeof input);
return;
}
// This method primarily updates the *total* count historically.
// `recordUsage` is preferred for setting current/total based on LLM response.
this.requestTokens += tokenCount;
// Setting `currentRequestTokens` here might be misleading if `recordUsage` is called later.
// Let's make this method mainly for historical accumulation if needed,
// or ensure it's only called when `recordUsage` isn't available.
// For now, we'll update current as well, assuming it's for the *start* of a turn.
this.currentRequestTokens = tokenCount;
if (this.debug) {
console.log(`[DEBUG] (Manual) Added ${tokenCount} request tokens. Total: ${this.requestTokens}, Current: ${this.currentRequestTokens}`);
}
}
/**
* Add to response token count (manual counting, less used now with recordUsage)
* @param {string|number} input - The text to count tokens for or the token count directly
*/
addResponseTokens(input) {
let tokenCount = 0;
if (typeof input === 'number') {
tokenCount = input;
} else if (typeof input === 'string') {
tokenCount = this.countTokens(input);
} else {
console.warn('[WARN] Invalid input type for addResponseTokens:', typeof input);
return;
}
this.responseTokens += tokenCount;
// Update current response tokens, assuming this is called when usage info is missing.
this.currentResponseTokens = tokenCount;
if (this.debug) {
console.log(`[DEBUG] (Manual) Added ${tokenCount} response tokens. Total: ${this.responseTokens}, Current: ${this.currentResponseTokens}`);
}
}
/**
* Record token usage from the AI SDK's result for a single LLM call.
* This resets 'current' counters and updates totals.
* @param {Object} usage - The usage object { promptTokens, completionTokens, totalTokens }
* @param {Object} providerMetadata - Metadata possibly containing cache info
*/
recordUsage(usage, providerMetadata) {
if (!usage) {
console.warn('[WARN] No usage information provided to recordUsage');
// If usage is missing, maybe fall back to manual counting?
// For now, just return and rely on manual calls if needed.
return;
}
// --- Reset CURRENT counters for this specific API call ---
this.currentRequestTokens = 0;
this.currentResponseTokens = 0;
this.currentCacheCreationTokens = 0;
this.currentCacheReadTokens = 0;
this.currentCachedPromptTokens = 0;
// --- Process usage data ---
const promptTokens = Number(usage.promptTokens) || 0;
const completionTokens = Number(usage.completionTokens) || 0;
// Update CURRENT tokens for this call
this.currentRequestTokens = promptTokens;
this.currentResponseTokens = completionTokens;
// Update TOTAL tokens accumulated over the session
this.requestTokens += promptTokens;
this.responseTokens += completionTokens;
// --- Process Provider Metadata for Cache Info ---
if (providerMetadata?.anthropic) {
const cacheCreation = Number(providerMetadata.anthropic.cacheCreationInputTokens) || 0;
const cacheRead = Number(providerMetadata.anthropic.cacheReadInputTokens) || 0;
this.currentCacheCreationTokens = cacheCreation;
this.currentCacheReadTokens = cacheRead;
this.cacheCreationTokens += cacheCreation;
this.cacheReadTokens += cacheRead;
if (this.debug) {
console.log(`[DEBUG] Anthropic cache tokens (current): creation=${cacheCreation}, read=${cacheRead}`);
}
}
if (providerMetadata?.openai) {
const cachedPrompt = Number(providerMetadata.openai.cachedPromptTokens) || 0;
this.currentCachedPromptTokens = cachedPrompt;
this.cachedPromptTokens += cachedPrompt;
if (this.debug) {
console.log(`[DEBUG] OpenAI cached prompt tokens (current): ${cachedPrompt}`);
}
}
// Note: We don't force context recalculation here.
// It should be done explicitly after history is updated.
if (this.debug) {
console.log(
`[DEBUG] Recorded usage: current(req=${this.currentRequestTokens}, resp=${this.currentResponseTokens}), total(req=${this.requestTokens}, resp=${this.responseTokens})`
);
// Log cache totals
console.log(`[DEBUG] Total cache tokens: Anthropic(create=${this.cacheCreationTokens}, read=${this.cacheReadTokens}), OpenAI(prompt=${this.cachedPromptTokens})`);
}
}
/**
* Calculate the current context window size based on provided messages or internal history.
* @param {Array|null} messages - Optional messages array to use for calculation. If null, uses internal this.history.
* @returns {number} - Total tokens estimated in the context window.
*/
calculateContextSize(messages = null) {
const msgsToCount = messages !== null ? messages : this.history;
let totalTokens = 0;
if (this.debug && messages === null) {
// Log only when using internal history to avoid spamming during loops using local messages array
console.log(`[DEBUG] Calculating context size from internal history (${this.history.length} messages)`);
}
for (const msg of msgsToCount) {
let messageTokens = 0;
// Add tokens for role overhead (approximate)
// Vercel SDK adds ~4 tokens per message for role/structure.
// Anthropic might be slightly different. Let's stick with 4 as an estimate.
messageTokens += 4;
// Content tokens
if (typeof msg.content === 'string') {
messageTokens += this.countTokens(msg.content);
} else if (Array.isArray(msg.content)) {
// Handle array content (e.g., Vercel AI SDK tool usage format)
for (const item of msg.content) {
if (item.type === 'text' && typeof item.text === 'string') {
messageTokens += this.countTokens(item.text);
} else {
// Estimate tokens for non-text parts (tool calls/results embedded)
messageTokens += this.countTokens(JSON.stringify(item));
}
}
} else if (msg.content) {
// Fallback for other content types
messageTokens += this.countTokens(JSON.stringify(msg.content));
}
// --- Add tokens for tool calls/results if present (Vercel SDK format) ---
// These might exist in 'assistant' or 'tool' messages depending on SDK version/usage
if (msg.toolCalls) {
messageTokens += this.countTokens(JSON.stringify(msg.toolCalls));
messageTokens += 5; // Approx overhead for tool_calls structure
}
// For 'tool' role messages (results)
if (msg.role === 'tool' && msg.toolCallId) {
messageTokens += this.countTokens(msg.toolCallId); // Add tokens for the ID
messageTokens += 5; // Approx overhead for tool role structure
// Content is already counted above
}
// Deprecated? Check if toolCallResults is still used
if (msg.toolCallResults) {
messageTokens += this.countTokens(JSON.stringify(msg.toolCallResults));
messageTokens += 5; // Approx overhead
}
// --- End Vercel SDK specific ---
totalTokens += messageTokens;
// if (this.debug) {
// // This log can be very noisy, disable for now
// // console.log(`[DEBUG] Msg (${msg.role}): ~${messageTokens} tokens`);
// }
}
// Update the instance property *only* if calculating based on internal history
if (messages === null) {
this.contextSize = totalTokens;
if (this.debug) {
console.log(`[DEBUG] Updated internal context size: ${this.contextSize} tokens`);
}
}
return totalTokens;
}
/**
* Update internal history and recalculate internal context window size.
* @param {Array} messages - New message history array.
*/
updateHistory(messages) {
// Ensure messages is an array
if (!Array.isArray(messages)) {
console.warn("[WARN] updateHistory called with non-array:", messages);
this.history = [];
} else {
// Create a shallow copy to avoid external modifications
this.history = [...messages];
}
// Recalculate context size based on the new internal history
this.calculateContextSize(); // This updates this.contextSize
if (this.debug) {
console.log(`[DEBUG] History updated (${this.history.length} messages). Recalculated context size: ${this.contextSize}`);
}
}
/**
* Clear all counters and internal history. Reset context size.
*/
clear() {
// Reset counters
this.requestTokens = 0;
this.responseTokens = 0;
this.currentRequestTokens = 0;
this.currentResponseTokens = 0;
this.cacheCreationTokens = 0;
this.cacheReadTokens = 0;
this.currentCacheCreationTokens = 0;
this.currentCacheReadTokens = 0;
this.cachedPromptTokens = 0;
this.currentCachedPromptTokens = 0;
// Clear history and context
this.history = [];
this.contextSize = 0; // Reset calculated context size
if (this.debug) {
console.log('[DEBUG] TokenCounter cleared: usage, history, and context size reset.');
}
}
/**
* Start a new conversation turn - reset CURRENT token counters.
* Calculates context size based on history *before* the new turn.
*/
startNewTurn() {
this.currentRequestTokens = 0;
this.currentResponseTokens = 0;
this.currentCacheCreationTokens = 0;
this.currentCacheReadTokens = 0;
this.currentCachedPromptTokens = 0;
// Calculate context size based on current history *before* new messages are added
this.calculateContextSize(); // Updates this.contextSize
if (this.debug) {
console.log('[DEBUG] TokenCounter: New turn started. Current counters reset.');
console.log(`[DEBUG] Context size at start of turn: ${this.contextSize} tokens`);
}
}
/**
* Get the current token usage state including context size.
* Recalculates context size from internal history before returning.
* @returns {Object} - Object containing current turn, total session, and context window usage.
*/
getTokenUsage() {
// Always calculate context window size from internal history right before returning usage
const currentContextSize = this.calculateContextSize(); // Recalculates and updates this.contextSize
// Consolidate cache info for simpler reporting
const currentCacheRead = this.currentCacheReadTokens + this.currentCachedPromptTokens;
const currentCacheWrite = this.currentCacheCreationTokens;
const totalCacheRead = this.cacheReadTokens + this.cachedPromptTokens;
const totalCacheWrite = this.cacheCreationTokens;
const usageData = {
contextWindow: currentContextSize, // Use the freshly calculated value
current: { // Usage for the *last* LLM call recorded
request: this.currentRequestTokens,
response: this.currentResponseTokens,
total: this.currentRequestTokens + this.currentResponseTokens,
cacheRead: currentCacheRead,
cacheWrite: currentCacheWrite,
cacheTotal: currentCacheRead + currentCacheWrite,
// Keep detailed breakdown if needed
anthropic: {
cacheCreation: this.currentCacheCreationTokens,
cacheRead: this.currentCacheReadTokens,
},
openai: {
cachedPrompt: this.currentCachedPromptTokens
}
},
total: { // Accumulated usage over the session
request: this.requestTokens,
response: this.responseTokens,
total: this.requestTokens + this.responseTokens,
cacheRead: totalCacheRead,
cacheWrite: totalCacheWrite,
cacheTotal: totalCacheRead + totalCacheWrite,
// Keep detailed breakdown if needed
anthropic: {
cacheCreation: this.cacheCreationTokens,
cacheRead: this.cacheReadTokens,
},
openai: {
cachedPrompt: this.cachedPromptTokens
}
}
};
if (this.debug) {
// Log less frequently or only when values change significantly?
// console.log(`[DEBUG] getTokenUsage() called. Returning data:`, JSON.stringify(usageData, null, 2));
}
return usageData;
}
}