UNPKG

gemini-cost-tracker

Version:

CLI tool to display token usage and costs for Gemini and Vertex AI

318 lines 14.9 kB
import { Logging } from '@google-cloud/logging'; import MonitoringClient from '@google-cloud/monitoring'; import { AppError, ErrorCode } from '../../types/index.js'; import { logger } from '../../utils/logger.js'; export class RealUsageClient { logging; monitoring; authManager; projectId; constructor(authManager) { this.authManager = authManager; this.projectId = ''; } async initialize() { try { const gcpCredentials = await this.authManager.getGcpCredentials(); this.projectId = gcpCredentials.projectId; // Initialize Google Cloud clients const clientConfig = { projectId: this.projectId, }; // Use service account key file if provided if (gcpCredentials.keyFile) { clientConfig.keyFilename = gcpCredentials.keyFile; } this.logging = new Logging(clientConfig); this.monitoring = new MonitoringClient.MetricServiceClient(clientConfig); logger.info('Real usage client initialized', { projectId: this.projectId }); } catch (error) { throw new AppError(ErrorCode.REAL_USAGE_INIT_ERROR, `Failed to initialize real usage client: ${error instanceof Error ? error.message : 'Unknown error'}`); } } async getUsage(params) { // Validate date range if (params.startDate >= params.endDate) { throw new AppError(ErrorCode.VALIDATION_ERROR, 'Start date must be before end date'); } try { await this.initialize(); logger.info('Fetching real API usage data', { projectId: this.projectId, startDate: params.startDate.toISOString(), endDate: params.endDate.toISOString(), }); // Get usage data from multiple sources const [geminiUsage, vertexUsage] = await Promise.all([ this.getGeminiUsageFromLogs(params), this.getVertexUsageFromMonitoring(params), ]); const allUsage = [...geminiUsage, ...vertexUsage]; // Apply filters let filteredUsage = allUsage; if (params.model) { filteredUsage = filteredUsage.filter((usage) => usage.model === params.model); } if (params.project) { filteredUsage = filteredUsage.filter((usage) => usage.project === params.project); } logger.info(`Retrieved ${filteredUsage.length} real usage records`); return filteredUsage; } catch (error) { if (error instanceof AppError) { throw error; } // Fallback to existing mock data if real data retrieval fails logger.warn('Failed to fetch real usage data, falling back to mock data', { error: error instanceof Error ? error.message : 'Unknown error', stack: error instanceof Error ? error.stack : undefined, }); return this.generateFallbackData(params); } } async getGeminiUsageFromLogs(params) { try { const filter = ` protoPayload.serviceName="generativelanguage.googleapis.com" AND protoPayload.methodName="google.ai.generativelanguage.v1beta.GenerativeService.GenerateContent" AND timestamp >= "${params.startDate.toISOString()}" AND timestamp <= "${params.endDate.toISOString()}" `; if (!this.logging) { throw new Error('Logging client not initialized'); } const [entries] = await this.logging.getEntries({ filter: filter.trim(), pageSize: 1000, orderBy: 'timestamp desc', }); const usage = []; for (const entry of entries) { try { // eslint-disable-next-line @typescript-eslint/no-explicit-any const logData = entry.data; // eslint-disable-next-line @typescript-eslint/no-explicit-any const metadata = entry.metadata; const timestamp = new Date(metadata.timestamp); // Extract model information from the request const modelMatch = logData.protoPayload?.resourceName?.match(/models\/(.+):/); const model = modelMatch ? modelMatch[1] : 'gemini-pro'; // Try to extract token usage from response or request let inputTokens = 0; let outputTokens = 0; // Look for usage metadata in the response if (logData.protoPayload?.response?.usageMetadata) { const usageMetadata = logData.protoPayload.response.usageMetadata; inputTokens = usageMetadata.promptTokenCount || 0; outputTokens = usageMetadata.candidatesTokenCount || 0; } else { // Estimate token usage from request/response size if exact data not available const requestText = JSON.stringify(logData.protoPayload?.request || ''); const responseText = JSON.stringify(logData.protoPayload?.response || ''); // Rough estimation: 1 token ≈ 4 characters inputTokens = Math.ceil(requestText.length / 4); outputTokens = Math.ceil(responseText.length / 4); } if (inputTokens > 0 || outputTokens > 0) { usage.push({ id: `real-gemini-${timestamp.getTime()}-${Math.random().toString(36).substr(2, 9)}`, timestamp, service: 'gemini', model, inputTokens, outputTokens, project: this.projectId, region: logData.protoPayload?.request?.location || 'us-central1', }); } } catch (entryError) { logger.warn('Failed to parse log entry', { error: entryError instanceof Error ? entryError.message : String(entryError), }); continue; } } logger.info(`Extracted ${usage.length} Gemini usage records from logs`); return usage; } catch (error) { logger.error('Failed to fetch Gemini usage from logs', { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : undefined, projectId: this.projectId, }); return []; } } async getVertexUsageFromMonitoring(params) { try { // Query Cloud Monitoring for Vertex AI API usage const request = { name: `projects/${this.projectId}`, filter: 'metric.type="aiplatform.googleapis.com/prediction/request_count"', interval: { startTime: { seconds: Math.floor(params.startDate.getTime() / 1000), }, endTime: { seconds: Math.floor(params.endDate.getTime() / 1000), }, }, aggregation: { alignmentPeriod: { seconds: 3600, // 1 hour buckets }, perSeriesAligner: 1, // ALIGN_RATE crossSeriesReducer: 4, // REDUCE_SUM groupByFields: ['resource.label.model_id'], }, }; if (!this.monitoring) { throw new Error('Monitoring client not initialized'); } const response = await this.monitoring.listTimeSeries(request); const timeSeries = response[0] || []; const usage = []; for (const series of timeSeries) { try { const modelId = series.resource?.labels?.model_id || 'text-bison-001'; for (const point of series.points || []) { if (point.interval?.startTime?.seconds && point.value?.doubleValue) { const timestamp = new Date(Number(point.interval.startTime.seconds) * 1000); const requestCount = Math.round(point.value.doubleValue); if (requestCount > 0) { // Estimate token usage based on request count // This is an approximation - actual implementation would need more detailed metrics const avgInputTokens = 1000; // Average input tokens per request const avgOutputTokens = 500; // Average output tokens per request usage.push({ id: `real-vertex-${timestamp.getTime()}-${Math.random().toString(36).substr(2, 9)}`, timestamp, service: 'vertex-ai', model: modelId, inputTokens: requestCount * avgInputTokens, outputTokens: requestCount * avgOutputTokens, project: this.projectId, region: 'us-central1', }); } } } } catch (seriesError) { logger.warn('Failed to parse monitoring series', { error: seriesError instanceof Error ? seriesError.message : String(seriesError), }); continue; } } logger.info(`Extracted ${usage.length} Vertex AI usage records from monitoring`); return usage; } catch (error) { logger.error('Failed to fetch Vertex usage from monitoring', { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : undefined, projectId: this.projectId, }); return []; } } // Billing API integration method (future implementation) // private async getBillingData(_params: UsageParams): Promise<Usage[]> { // try { // // This would use Cloud Billing API to get actual billing data // // Currently not implemented due to complexity and permissions required // logger.info('Billing API integration not yet implemented'); // return []; // } catch (error) { // logger.error('Failed to fetch billing data', { error }); // return []; // } // } generateFallbackData(params) { // Return enhanced mock data when real data is unavailable const mockData = []; const daysDiff = Math.ceil((params.endDate.getTime() - params.startDate.getTime()) / (1000 * 60 * 60 * 24)); // Generate more realistic data based on actual usage patterns for (let i = 0; i < Math.min(daysDiff, 5); i++) { const date = new Date(params.startDate); date.setDate(date.getDate() + i); // Gemini usage mockData.push({ id: `fallback-gemini-${date.toISOString().split('T')[0]}-${i}`, timestamp: date, service: 'gemini', model: params.model || 'gemini-1.5-flash', inputTokens: Math.floor(Math.random() * 5000) + 500, outputTokens: Math.floor(Math.random() * 2000) + 200, project: params.project || this.projectId, region: 'us-central1', }); // Vertex AI usage mockData.push({ id: `fallback-vertex-${date.toISOString().split('T')[0]}-${i}`, timestamp: date, service: 'vertex-ai', model: params.model || 'text-bison-001', inputTokens: Math.floor(Math.random() * 4000) + 400, outputTokens: Math.floor(Math.random() * 1500) + 150, project: params.project || this.projectId, region: 'us-central1', }); } logger.info(`Generated ${mockData.length} fallback usage records`); return mockData; } async testConnections() { const results = { logging: false, monitoring: false }; try { await this.initialize(); // Test logging connection try { if (this.logging) { await this.logging.getEntries({ pageSize: 1 }); results.logging = true; logger.info('Logging API connection successful'); } } catch (error) { logger.warn('Logging API connection failed', { error: error instanceof Error ? error.message : String(error), }); } // Test monitoring connection try { if (this.monitoring) { await this.monitoring.listTimeSeries({ name: `projects/${this.projectId}`, filter: 'metric.type="compute.googleapis.com/instance/up"', interval: { startTime: { seconds: Math.floor(Date.now() / 1000) - 3600 }, endTime: { seconds: Math.floor(Date.now() / 1000) }, }, }); results.monitoring = true; logger.info('Monitoring API connection successful'); } } catch (error) { logger.warn('Monitoring API connection failed', { error: error instanceof Error ? error.message : String(error), }); } } catch (error) { logger.error('Failed to test connections', { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : undefined, }); } return results; } } //# sourceMappingURL=realUsageClient.js.map