@flatfile/improv

# Enhanced Token Usage Tracking Guide This guide shows how to use the enhanced token usage tracking system to monitor and log token consumption at different levels in production. ## Quick Start ```typescript import { Thread, Message, Tool, OpenAIThreadDriver, AnthropicThreadDriver } from "@flatfile/improv"; // Initialize driver with token tracking enabled const driver = new OpenAIThreadDriver({ model: "gpt-4o", // Optional: customize character limits for large contexts maxPromptCharacters: 2_000_000, // 2M chars ≈ 400K-800K tokens }); const thread = new Thread({ driver }); // Add message and send thread.push(new Message({ role: "user", content: "Hello!" })); await driver.sendThread(thread); // Get detailed token usage const usage = thread.getLastMessageTokenUsage(); console.log("Token Usage:", { inputTokens: usage?.inputTokens, outputTokens: usage?.outputTokens, totalTokens: usage?.totalTokens, // OpenAI specific cachedTokens: usage?.cachedTokens, reasoningTokens: usage?.reasoningTokens, // Tool breakdown toolBreakdown: usage?.toolTokenBreakdown }); ``` ## Per-Message Token Tracking Track token usage for each individual message in a conversation: ```typescript class MessageTokenLogger { private conversation: Array<{ messageId: string; content: string; role: string; tokens: any; timestamp: Date; cost: number; }> = []; async sendAndTrack(thread: Thread, message: Message) { const messageId = `msg_${Date.now()}`; // Send message thread.push(message); await thread.driver.sendThread(thread); // Get token usage for this specific message const usage = thread.getLastMessageTokenUsage(); // Calculate cost (example for OpenAI gpt-4o) const inputCost = (usage?.inputTokens || 0) * 0.0025 / 1000; // $2.50/1M const outputCost = (usage?.outputTokens || 0) * 0.010 / 1000; // $10.00/1M const totalCost = inputCost + outputCost; // Log message-level usage this.conversation.push({ messageId, content: message.content || "", role: message.role, tokens: { input: usage?.inputTokens, output: usage?.outputTokens, total: usage?.totalTokens, cached: usage?.cachedTokens, reasoning: usage?.reasoningTokens, tools: usage?.toolTokenBreakdown?.length || 0 }, timestamp: new Date(), cost: totalCost }); console.log(`Message ${messageId} Token Usage:`, { tokens: usage?.totalTokens, cost: `$${totalCost.toFixed(6)}`, toolsUsed: usage?.toolTokenBreakdown?.length || 0 }); return { messageId, usage, cost: totalCost }; } getConversationStats() { const totalTokens = this.conversation.reduce((sum, msg) => sum + (msg.tokens.total || 0), 0); const totalCost = this.conversation.reduce((sum, msg) => sum + msg.cost, 0); return { totalMessages: this.conversation.length, totalTokens, totalCost: `$${totalCost.toFixed(6)}`, averageTokensPerMessage: Math.round(totalTokens / this.conversation.length), conversationHistory: this.conversation }; } } ``` ## Per-Tool Token Tracking Track token usage for each tool call with detailed breakdown: ```typescript class ToolTokenTracker { private toolUsage: Map<string, { totalCalls: number; totalTokens: number; totalCost: number; callHistory: Array<{ callId: string; inputTokens: number; outputTokens: number; executionTime?: number; timestamp: Date; }>; }> = new Map(); async trackToolUsage(thread: Thread) { const usage = thread.getLastMessageTokenUsage(); const toolBreakdown = usage?.toolTokenBreakdown || []; for (const tool of toolBreakdown) { const toolName = tool.toolName; const callId = tool.toolUseId; // Get or create tool stats if (!this.toolUsage.has(toolName)) { this.toolUsage.set(toolName, { totalCalls: 0, totalTokens: 0, totalCost: 0, callHistory: [] }); } const toolStats = this.toolUsage.get(toolName)!; // Calculate cost for this tool call (distribute total message cost) const totalMessageCost = this.calculateMessageCost(usage); const toolCostRatio = tool.totalTokens / (usage?.totalTokens || 1); const toolCost = totalMessageCost * toolCostRatio; // Update tool stats toolStats.totalCalls++; toolStats.totalTokens += tool.totalTokens; toolStats.totalCost += toolCost; toolStats.callHistory.push({ callId, inputTokens: tool.inputTokens, outputTokens: tool.outputTokens, executionTime: tool.executionTimeMs, timestamp: new Date() }); console.log(`Tool ${toolName} Usage:`, { callId, tokens: tool.totalTokens, cost: `$${toolCost.toFixed(6)}`, executionTime: tool.executionTimeMs ? `${tool.executionTimeMs}ms` : 'N/A' }); } } private calculateMessageCost(usage: any): number { // Example for OpenAI - adjust rates for your provider const inputCost = (usage?.inputTokens || 0) * 0.0025 / 1000; const outputCost = (usage?.outputTokens || 0) * 0.010 / 1000; return inputCost + outputCost; } getToolAnalytics() { const toolStats = Array.from(this.toolUsage.entries()).map(([name, stats]) => ({ toolName: name, totalCalls: stats.totalCalls, totalTokens: stats.totalTokens, totalCost: `$${stats.totalCost.toFixed(6)}`, averageTokensPerCall: Math.round(stats.totalTokens / stats.totalCalls), averageCostPerCall: `$${(stats.totalCost / stats.totalCalls).toFixed(6)}`, recentCalls: stats.callHistory.slice(-5) // Last 5 calls })); return { totalTools: this.toolUsage.size, toolStats, mostUsedTool: toolStats.reduce((prev, curr) => prev.totalCalls > curr.totalCalls ? prev : curr ), mostExpensiveTool: toolStats.reduce((prev, curr) => parseFloat(prev.totalCost.replace('$', '')) > parseFloat(curr.totalCost.replace('$', '')) ? prev : curr ) }; } } ``` ## Per-Agent Execution Tracking Track token usage across entire agent execution sessions: ```typescript class AgentExecutionTracker { private executionId: string; private startTime: Date; private sessions: Array<{ sessionId: string; agentName: string; totalTokens: number; totalCost: number; duration: number; messagesExchanged: number; toolsUsed: number; provider: string; model: string; }> = []; constructor(agentName: string, provider: string, model: string) { this.executionId = `exec_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; this.startTime = new Date(); console.log(`🚀 Starting agent execution: ${this.executionId}`); console.log(`Agent: ${agentName} | Provider: ${provider} | Model: ${model}`); } async trackExecution( agentName: string, thread: Thread, provider: string, model: string, sessionCallback: () => Promise<void> ) { const sessionId = `session_${Date.now()}`; const sessionStart = Date.now(); // Get initial token state const initialUsage = thread.getTokenUsage(); const initialMessages = thread.all().length; console.log(`📊 Session ${sessionId} started - Initial state:`, { existingTokens: initialUsage.totalTokens, existingMessages: initialMessages }); // Execute the session await sessionCallback(); // Get final token state const finalUsage = thread.getTokenUsage(); const finalMessages = thread.all().length; // Calculate session metrics const sessionTokens = finalUsage.totalTokens - initialUsage.totalTokens; const sessionMessages = finalMessages - initialMessages; const duration = Date.now() - sessionStart; // Count tools used in this session const toolsUsed = this.countToolsInSession(thread, initialMessages); // Calculate cost based on provider const sessionCost = this.calculateSessionCost(provider, sessionTokens); // Store session data this.sessions.push({ sessionId, agentName, totalTokens: sessionTokens, totalCost: sessionCost, duration, messagesExchanged: sessionMessages, toolsUsed, provider, model }); console.log(`✅ Session ${sessionId} completed:`, { duration: `${duration}ms`, tokens: sessionTokens, messages: sessionMessages, tools: toolsUsed, cost: `$${sessionCost.toFixed(6)}` }); return { sessionId, metrics: { tokens: sessionTokens, cost: sessionCost, duration, messages: sessionMessages, tools: toolsUsed } }; } private countToolsInSession(thread: Thread, fromMessage: number): number { let toolCount = 0; const messages = thread.all().slice(fromMessage); for (const message of messages) { if (message.toolCalls?.length) { toolCount += message.toolCalls.length; } } return toolCount; } private calculateSessionCost(provider: string, tokens: number): number { // Simplified cost calculation - adjust rates for your providers const rates = { openai: { input: 0.0025, output: 0.010 }, // gpt-4o rates per 1K tokens anthropic: { input: 0.003, output: 0.015 } // claude-3-5 rates per 1K tokens }; const rate = rates[provider as keyof typeof rates] || rates.openai; // Rough estimate assuming 70% input, 30% output return (tokens * 0.7 * rate.input + tokens * 0.3 * rate.output) / 1000; } getExecutionSummary() { const totalTokens = this.sessions.reduce((sum, s) => sum + s.totalTokens, 0); const totalCost = this.sessions.reduce((sum, s) => sum + s.totalCost, 0); const totalDuration = Date.now() - this.startTime.getTime(); const totalMessages = this.sessions.reduce((sum, s) => sum + s.messagesExchanged, 0); const totalTools = this.sessions.reduce((sum, s) => sum + s.toolsUsed, 0); return { executionId: this.executionId, summary: { totalSessions: this.sessions.length, totalDuration: `${totalDuration}ms`, totalTokens, totalCost: `$${totalCost.toFixed(6)}`, totalMessages, totalTools, averageTokensPerSession: Math.round(totalTokens / this.sessions.length), averageCostPerSession: `$${(totalCost / this.sessions.length).toFixed(6)}` }, sessions: this.sessions, insights: { mostActiveSession: this.sessions.reduce((prev, curr) => prev.totalTokens > curr.totalTokens ? prev : curr ), longestSession: this.sessions.reduce((prev, curr) => prev.duration > curr.duration ? prev : curr ), averageResponseTime: Math.round(totalDuration / totalMessages) } }; } // Export data for external logging/monitoring systems exportForLogging() { return { timestamp: new Date().toISOString(), executionId: this.executionId, ...this.getExecutionSummary(), // Add any additional metadata for your logging system environment: process.env.NODE_ENV, version: "1.0.0" // Your app version }; } } ``` ## Complete Production Example Here's how to combine all three tracking levels: ```typescript import { Thread, Message, Tool, OpenAIThreadDriver } from "@flatfile/improv"; async function productionAgentExample() { // Initialize tracking const agentTracker = new AgentExecutionTracker("CustomerServiceBot", "openai", "gpt-4o"); const messageLogger = new MessageTokenLogger(); const toolTracker = new ToolTokenTracker(); // Set up driver with large context support const driver = new OpenAIThreadDriver({ model: "gpt-4o", maxPromptCharacters: 2_000_000, // Support up to 2M characters }); const thread = new Thread({ driver }); // Define tools const searchTool = new Tool({ name: "search_knowledge_base", description: "Search internal knowledge base", parameters: z.object({ query: z.string(), category: z.string().optional() }), handler: async ({ query, category }) => { // Simulate knowledge base search return { results: [`Result for ${query} in ${category}`] }; } }); thread.addTool(searchTool); // Track entire agent session await agentTracker.trackExecution( "CustomerServiceBot", thread, "openai", "gpt-4o", async () => { // Simulate customer conversation const userMessages = [ "I need help with my account", "I can't log in to my dashboard", "What are the system requirements?" ]; for (const userMessage of userMessages) { // Send user message with per-message tracking await messageLogger.sendAndTrack( thread, new Message({ role: "user", content: userMessage }) ); // Generate assistant response (with potential tool use) await driver.sendThread(thread); // Track tool usage await toolTracker.trackToolUsage(thread); // Track assistant message const lastMessage = thread.last(); if (lastMessage?.role === "assistant") { await messageLogger.sendAndTrack(thread, lastMessage); } } } ); // Get comprehensive analytics console.log("\n🎯 EXECUTION SUMMARY"); console.log("=".repeat(50)); const executionSummary = agentTracker.getExecutionSummary(); console.log("Agent Execution:", executionSummary.summary); const conversationStats = messageLogger.getConversationStats(); console.log("Conversation Stats:", conversationStats); const toolAnalytics = toolTracker.getToolAnalytics(); console.log("Tool Usage:", toolAnalytics); // Export for monitoring systems const exportData = agentTracker.exportForLogging(); // Send to your logging/monitoring service // await sendToDatadog(exportData); // await sendToNewRelic(exportData); // await logToFile(exportData); console.log("\n📤 Export data ready for logging systems"); return exportData; } // Run the example productionAgentExample().catch(console.error); ``` ## Integration with Monitoring Systems ### DataDog Integration ```typescript import { StatsD } from "node-dogstatsd-client"; const dogstatsd = new StatsD(); function trackTokenUsage(usage: any, tags: string[]) { dogstatsd.histogram('agent.tokens.input', usage.inputTokens, tags); dogstatsd.histogram('agent.tokens.output', usage.outputTokens, tags); dogstatsd.histogram('agent.tokens.total', usage.totalTokens, tags); if (usage.cachedTokens) { dogstatsd.histogram('agent.tokens.cached', usage.cachedTokens, tags); } if (usage.reasoningTokens) { dogstatsd.histogram('agent.tokens.reasoning', usage.reasoningTokens, tags); } } ``` ### Custom Logging ```typescript import winston from 'winston'; const logger = winston.createLogger({ format: winston.format.json(), transports: [ new winston.transports.File({ filename: 'token-usage.log' }), new winston.transports.Console() ] }); function logTokenUsage(level: 'message' | 'tool' | 'execution', data: any) { logger.info(`Token Usage - ${level}`, { timestamp: new Date().toISOString(), level, ...data }); } ``` This comprehensive tracking system gives you full visibility into token usage at every level of your AI agent operations!