@flatfile/improv
Version:
A powerful TypeScript library for building AI agents with multi-threaded conversations, tool execution, and event handling capabilities
520 lines (433 loc) ⢠15.7 kB
Markdown
# Enhanced Token Usage Tracking Guide
This guide shows how to use the enhanced token usage tracking system to monitor and log token consumption at different levels in production.
## Quick Start
```typescript
import { Thread, Message, Tool, OpenAIThreadDriver, AnthropicThreadDriver } from "@flatfile/improv";
// Initialize driver with token tracking enabled
const driver = new OpenAIThreadDriver({
model: "gpt-4o",
// Optional: customize character limits for large contexts
maxPromptCharacters: 2_000_000, // 2M chars ā 400K-800K tokens
});
const thread = new Thread({ driver });
// Add message and send
thread.push(new Message({ role: "user", content: "Hello!" }));
await driver.sendThread(thread);
// Get detailed token usage
const usage = thread.getLastMessageTokenUsage();
console.log("Token Usage:", {
inputTokens: usage?.inputTokens,
outputTokens: usage?.outputTokens,
totalTokens: usage?.totalTokens,
// OpenAI specific
cachedTokens: usage?.cachedTokens,
reasoningTokens: usage?.reasoningTokens,
// Tool breakdown
toolBreakdown: usage?.toolTokenBreakdown
});
```
## Per-Message Token Tracking
Track token usage for each individual message in a conversation:
```typescript
class MessageTokenLogger {
private conversation: Array<{
messageId: string;
content: string;
role: string;
tokens: any;
timestamp: Date;
cost: number;
}> = [];
async sendAndTrack(thread: Thread, message: Message) {
const messageId = `msg_${Date.now()}`;
// Send message
thread.push(message);
await thread.driver.sendThread(thread);
// Get token usage for this specific message
const usage = thread.getLastMessageTokenUsage();
// Calculate cost (example for OpenAI gpt-4o)
const inputCost = (usage?.inputTokens || 0) * 0.0025 / 1000; // $2.50/1M
const outputCost = (usage?.outputTokens || 0) * 0.010 / 1000; // $10.00/1M
const totalCost = inputCost + outputCost;
// Log message-level usage
this.conversation.push({
messageId,
content: message.content || "",
role: message.role,
tokens: {
input: usage?.inputTokens,
output: usage?.outputTokens,
total: usage?.totalTokens,
cached: usage?.cachedTokens,
reasoning: usage?.reasoningTokens,
tools: usage?.toolTokenBreakdown?.length || 0
},
timestamp: new Date(),
cost: totalCost
});
console.log(`Message ${messageId} Token Usage:`, {
tokens: usage?.totalTokens,
cost: `$${totalCost.toFixed(6)}`,
toolsUsed: usage?.toolTokenBreakdown?.length || 0
});
return { messageId, usage, cost: totalCost };
}
getConversationStats() {
const totalTokens = this.conversation.reduce((sum, msg) => sum + (msg.tokens.total || 0), 0);
const totalCost = this.conversation.reduce((sum, msg) => sum + msg.cost, 0);
return {
totalMessages: this.conversation.length,
totalTokens,
totalCost: `$${totalCost.toFixed(6)}`,
averageTokensPerMessage: Math.round(totalTokens / this.conversation.length),
conversationHistory: this.conversation
};
}
}
```
## Per-Tool Token Tracking
Track token usage for each tool call with detailed breakdown:
```typescript
class ToolTokenTracker {
private toolUsage: Map<string, {
totalCalls: number;
totalTokens: number;
totalCost: number;
callHistory: Array<{
callId: string;
inputTokens: number;
outputTokens: number;
executionTime?: number;
timestamp: Date;
}>;
}> = new Map();
async trackToolUsage(thread: Thread) {
const usage = thread.getLastMessageTokenUsage();
const toolBreakdown = usage?.toolTokenBreakdown || [];
for (const tool of toolBreakdown) {
const toolName = tool.toolName;
const callId = tool.toolUseId;
// Get or create tool stats
if (!this.toolUsage.has(toolName)) {
this.toolUsage.set(toolName, {
totalCalls: 0,
totalTokens: 0,
totalCost: 0,
callHistory: []
});
}
const toolStats = this.toolUsage.get(toolName)!;
// Calculate cost for this tool call (distribute total message cost)
const totalMessageCost = this.calculateMessageCost(usage);
const toolCostRatio = tool.totalTokens / (usage?.totalTokens || 1);
const toolCost = totalMessageCost * toolCostRatio;
// Update tool stats
toolStats.totalCalls++;
toolStats.totalTokens += tool.totalTokens;
toolStats.totalCost += toolCost;
toolStats.callHistory.push({
callId,
inputTokens: tool.inputTokens,
outputTokens: tool.outputTokens,
executionTime: tool.executionTimeMs,
timestamp: new Date()
});
console.log(`Tool ${toolName} Usage:`, {
callId,
tokens: tool.totalTokens,
cost: `$${toolCost.toFixed(6)}`,
executionTime: tool.executionTimeMs ? `${tool.executionTimeMs}ms` : 'N/A'
});
}
}
private calculateMessageCost(usage: any): number {
// Example for OpenAI - adjust rates for your provider
const inputCost = (usage?.inputTokens || 0) * 0.0025 / 1000;
const outputCost = (usage?.outputTokens || 0) * 0.010 / 1000;
return inputCost + outputCost;
}
getToolAnalytics() {
const toolStats = Array.from(this.toolUsage.entries()).map(([name, stats]) => ({
toolName: name,
totalCalls: stats.totalCalls,
totalTokens: stats.totalTokens,
totalCost: `$${stats.totalCost.toFixed(6)}`,
averageTokensPerCall: Math.round(stats.totalTokens / stats.totalCalls),
averageCostPerCall: `$${(stats.totalCost / stats.totalCalls).toFixed(6)}`,
recentCalls: stats.callHistory.slice(-5) // Last 5 calls
}));
return {
totalTools: this.toolUsage.size,
toolStats,
mostUsedTool: toolStats.reduce((prev, curr) =>
prev.totalCalls > curr.totalCalls ? prev : curr
),
mostExpensiveTool: toolStats.reduce((prev, curr) =>
parseFloat(prev.totalCost.replace('$', '')) > parseFloat(curr.totalCost.replace('$', '')) ? prev : curr
)
};
}
}
```
## Per-Agent Execution Tracking
Track token usage across entire agent execution sessions:
```typescript
class AgentExecutionTracker {
private executionId: string;
private startTime: Date;
private sessions: Array<{
sessionId: string;
agentName: string;
totalTokens: number;
totalCost: number;
duration: number;
messagesExchanged: number;
toolsUsed: number;
provider: string;
model: string;
}> = [];
constructor(agentName: string, provider: string, model: string) {
this.executionId = `exec_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
this.startTime = new Date();
console.log(`š Starting agent execution: ${this.executionId}`);
console.log(`Agent: ${agentName} | Provider: ${provider} | Model: ${model}`);
}
async trackExecution(
agentName: string,
thread: Thread,
provider: string,
model: string,
sessionCallback: () => Promise<void>
) {
const sessionId = `session_${Date.now()}`;
const sessionStart = Date.now();
// Get initial token state
const initialUsage = thread.getTokenUsage();
const initialMessages = thread.all().length;
console.log(`š Session ${sessionId} started - Initial state:`, {
existingTokens: initialUsage.totalTokens,
existingMessages: initialMessages
});
// Execute the session
await sessionCallback();
// Get final token state
const finalUsage = thread.getTokenUsage();
const finalMessages = thread.all().length;
// Calculate session metrics
const sessionTokens = finalUsage.totalTokens - initialUsage.totalTokens;
const sessionMessages = finalMessages - initialMessages;
const duration = Date.now() - sessionStart;
// Count tools used in this session
const toolsUsed = this.countToolsInSession(thread, initialMessages);
// Calculate cost based on provider
const sessionCost = this.calculateSessionCost(provider, sessionTokens);
// Store session data
this.sessions.push({
sessionId,
agentName,
totalTokens: sessionTokens,
totalCost: sessionCost,
duration,
messagesExchanged: sessionMessages,
toolsUsed,
provider,
model
});
console.log(`ā
Session ${sessionId} completed:`, {
duration: `${duration}ms`,
tokens: sessionTokens,
messages: sessionMessages,
tools: toolsUsed,
cost: `$${sessionCost.toFixed(6)}`
});
return {
sessionId,
metrics: {
tokens: sessionTokens,
cost: sessionCost,
duration,
messages: sessionMessages,
tools: toolsUsed
}
};
}
private countToolsInSession(thread: Thread, fromMessage: number): number {
let toolCount = 0;
const messages = thread.all().slice(fromMessage);
for (const message of messages) {
if (message.toolCalls?.length) {
toolCount += message.toolCalls.length;
}
}
return toolCount;
}
private calculateSessionCost(provider: string, tokens: number): number {
// Simplified cost calculation - adjust rates for your providers
const rates = {
openai: { input: 0.0025, output: 0.010 }, // gpt-4o rates per 1K tokens
anthropic: { input: 0.003, output: 0.015 } // claude-3-5 rates per 1K tokens
};
const rate = rates[provider as keyof typeof rates] || rates.openai;
// Rough estimate assuming 70% input, 30% output
return (tokens * 0.7 * rate.input + tokens * 0.3 * rate.output) / 1000;
}
getExecutionSummary() {
const totalTokens = this.sessions.reduce((sum, s) => sum + s.totalTokens, 0);
const totalCost = this.sessions.reduce((sum, s) => sum + s.totalCost, 0);
const totalDuration = Date.now() - this.startTime.getTime();
const totalMessages = this.sessions.reduce((sum, s) => sum + s.messagesExchanged, 0);
const totalTools = this.sessions.reduce((sum, s) => sum + s.toolsUsed, 0);
return {
executionId: this.executionId,
summary: {
totalSessions: this.sessions.length,
totalDuration: `${totalDuration}ms`,
totalTokens,
totalCost: `$${totalCost.toFixed(6)}`,
totalMessages,
totalTools,
averageTokensPerSession: Math.round(totalTokens / this.sessions.length),
averageCostPerSession: `$${(totalCost / this.sessions.length).toFixed(6)}`
},
sessions: this.sessions,
insights: {
mostActiveSession: this.sessions.reduce((prev, curr) =>
prev.totalTokens > curr.totalTokens ? prev : curr
),
longestSession: this.sessions.reduce((prev, curr) =>
prev.duration > curr.duration ? prev : curr
),
averageResponseTime: Math.round(totalDuration / totalMessages)
}
};
}
// Export data for external logging/monitoring systems
exportForLogging() {
return {
timestamp: new Date().toISOString(),
executionId: this.executionId,
...this.getExecutionSummary(),
// Add any additional metadata for your logging system
environment: process.env.NODE_ENV,
version: "1.0.0" // Your app version
};
}
}
```
## Complete Production Example
Here's how to combine all three tracking levels:
```typescript
import { Thread, Message, Tool, OpenAIThreadDriver } from "@flatfile/improv";
async function productionAgentExample() {
// Initialize tracking
const agentTracker = new AgentExecutionTracker("CustomerServiceBot", "openai", "gpt-4o");
const messageLogger = new MessageTokenLogger();
const toolTracker = new ToolTokenTracker();
// Set up driver with large context support
const driver = new OpenAIThreadDriver({
model: "gpt-4o",
maxPromptCharacters: 2_000_000, // Support up to 2M characters
});
const thread = new Thread({ driver });
// Define tools
const searchTool = new Tool({
name: "search_knowledge_base",
description: "Search internal knowledge base",
parameters: z.object({
query: z.string(),
category: z.string().optional()
}),
handler: async ({ query, category }) => {
// Simulate knowledge base search
return { results: [`Result for ${query} in ${category}`] };
}
});
thread.addTool(searchTool);
// Track entire agent session
await agentTracker.trackExecution(
"CustomerServiceBot",
thread,
"openai",
"gpt-4o",
async () => {
// Simulate customer conversation
const userMessages = [
"I need help with my account",
"I can't log in to my dashboard",
"What are the system requirements?"
];
for (const userMessage of userMessages) {
// Send user message with per-message tracking
await messageLogger.sendAndTrack(
thread,
new Message({ role: "user", content: userMessage })
);
// Generate assistant response (with potential tool use)
await driver.sendThread(thread);
// Track tool usage
await toolTracker.trackToolUsage(thread);
// Track assistant message
const lastMessage = thread.last();
if (lastMessage?.role === "assistant") {
await messageLogger.sendAndTrack(thread, lastMessage);
}
}
}
);
// Get comprehensive analytics
console.log("\nšÆ EXECUTION SUMMARY");
console.log("=".repeat(50));
const executionSummary = agentTracker.getExecutionSummary();
console.log("Agent Execution:", executionSummary.summary);
const conversationStats = messageLogger.getConversationStats();
console.log("Conversation Stats:", conversationStats);
const toolAnalytics = toolTracker.getToolAnalytics();
console.log("Tool Usage:", toolAnalytics);
// Export for monitoring systems
const exportData = agentTracker.exportForLogging();
// Send to your logging/monitoring service
// await sendToDatadog(exportData);
// await sendToNewRelic(exportData);
// await logToFile(exportData);
console.log("\nš¤ Export data ready for logging systems");
return exportData;
}
// Run the example
productionAgentExample().catch(console.error);
```
## Integration with Monitoring Systems
### DataDog Integration
```typescript
import { StatsD } from "node-dogstatsd-client";
const dogstatsd = new StatsD();
function trackTokenUsage(usage: any, tags: string[]) {
dogstatsd.histogram('agent.tokens.input', usage.inputTokens, tags);
dogstatsd.histogram('agent.tokens.output', usage.outputTokens, tags);
dogstatsd.histogram('agent.tokens.total', usage.totalTokens, tags);
if (usage.cachedTokens) {
dogstatsd.histogram('agent.tokens.cached', usage.cachedTokens, tags);
}
if (usage.reasoningTokens) {
dogstatsd.histogram('agent.tokens.reasoning', usage.reasoningTokens, tags);
}
}
```
### Custom Logging
```typescript
import winston from 'winston';
const logger = winston.createLogger({
format: winston.format.json(),
transports: [
new winston.transports.File({ filename: 'token-usage.log' }),
new winston.transports.Console()
]
});
function logTokenUsage(level: 'message' | 'tool' | 'execution', data: any) {
logger.info(`Token Usage - ${level}`, {
timestamp: new Date().toISOString(),
level,
...data
});
}
```
This comprehensive tracking system gives you full visibility into token usage at every level of your AI agent operations!