UNPKG

mcp-product-manager

Version:

MCP Orchestrator for task and project management with web interface

195 lines 6.86 kB
// modelPricing.js - Accurate pricing for different models and cache usage // Claude 3.5 pricing as of Jan 2025 (per million tokens) export const MODEL_PRICING = { opus: { name: 'Claude 3 Opus', input: 15.00, output: 75.00, cacheWrite: 3.75, // 25% of input cost to create cache cacheRead: 1.50 // 10% of input cost to read from cache }, sonnet: { name: 'Claude 3.5 Sonnet', input: 3.00, output: 15.00, cacheWrite: 0.75, // 25% of input cost cacheRead: 0.30 // 10% of input cost }, haiku: { name: 'Claude 3.5 Haiku', input: 0.25, output: 1.25, cacheWrite: 0.0625, cacheRead: 0.025 } }; // Cache effectiveness estimates export const CACHE_PATTERNS = { // Percentage of input that typically comes from cache after first prompt averageCacheHitRate: 0.7, // 70% of context is cached on average // Different scenarios scenarios: { firstPrompt: { cacheHitRate: 0, // No cache on first prompt cacheWriteRate: 0.9 // 90% of context gets cached }, subsequentPrompts: { cacheHitRate: 0.8, // 80% from cache cacheWriteRate: 0.2 // 20% new content to cache }, longConversation: { cacheHitRate: 0.9, // 90% from cache in long convos cacheWriteRate: 0.1 // Only 10% new } } }; /** * Calculate cost for a task based on tokens and model * @param {object} params - Cost calculation parameters * @param {number} params.readTokens - Input/read tokens * @param {number} params.writeTokens - Output/write tokens * @param {string} params.model - Model name (opus/sonnet/haiku) * @param {number} params.prompts - Number of prompts (for cache estimation) * @param {boolean} params.hasLongContext - Whether task involves long context * @returns {object} Detailed cost breakdown */ export function calculateCost({ readTokens, writeTokens, model = 'sonnet', prompts = 1, hasLongContext = false }) { const pricing = MODEL_PRICING[model] || MODEL_PRICING.sonnet; // Estimate cache usage based on number of prompts let cacheScenario; if (prompts === 1) { cacheScenario = CACHE_PATTERNS.scenarios.firstPrompt; } else if (prompts > 5 || hasLongContext) { cacheScenario = CACHE_PATTERNS.scenarios.longConversation; } else { cacheScenario = CACHE_PATTERNS.scenarios.subsequentPrompts; } // Break down read tokens into cached vs non-cached const cachedTokens = Math.round(readTokens * cacheScenario.cacheHitRate); const nonCachedTokens = readTokens - cachedTokens; const cacheWriteTokens = Math.round(readTokens * cacheScenario.cacheWriteRate); // Calculate costs (convert to cost per token, not per million) const costs = { // Input costs nonCachedInput: (nonCachedTokens / 1_000_000) * pricing.input, cachedInput: (cachedTokens / 1_000_000) * pricing.cacheRead, cacheCreation: (cacheWriteTokens / 1_000_000) * pricing.cacheWrite, // Output costs (never cached) output: (writeTokens / 1_000_000) * pricing.output, // Totals totalInput: 0, totalOutput: 0, total: 0 }; // Calculate totals costs.totalInput = costs.nonCachedInput + costs.cachedInput + costs.cacheCreation; costs.totalOutput = costs.output; costs.total = costs.totalInput + costs.totalOutput; return { model: pricing.name, tokens: { read: readTokens, write: writeTokens, cached: cachedTokens, nonCached: nonCachedTokens, cacheWrite: cacheWriteTokens }, costs, breakdown: { inputBreakdown: [ { type: 'Non-cached input', tokens: nonCachedTokens, cost: costs.nonCachedInput }, { type: 'Cached input', tokens: cachedTokens, cost: costs.cachedInput }, { type: 'Cache creation', tokens: cacheWriteTokens, cost: costs.cacheCreation } ], outputBreakdown: [ { type: 'Generated output', tokens: writeTokens, cost: costs.output } ] }, savings: { withoutCache: ((readTokens / 1_000_000) * pricing.input) + costs.output, withCache: costs.total, saved: 0 } }; } /** * Estimate cost for a task before execution * @param {object} task - Task object * @param {number} estimatedTokens - Total estimated tokens * @returns {object} Cost estimate with ranges */ export function estimateTaskCost(task, estimatedTokens) { const model = task.model || 'sonnet'; // Estimate token distribution (typical ratios) const readRatio = 0.7; // 70% read const writeRatio = 0.3; // 30% write const readTokens = Math.round(estimatedTokens * readRatio); const writeTokens = Math.round(estimatedTokens * writeRatio); // Estimate prompts based on task complexity let estimatedPrompts = 1; if (task.priority === 'critical' || task.category === 'bug') { estimatedPrompts = 5; // More back-and-forth expected } else if (task.category === 'feature' || task.category === 'refactor') { estimatedPrompts = 3; } // Calculate costs for different scenarios const bestCase = calculateCost({ readTokens: readTokens * 0.8, // 20% fewer tokens writeTokens: writeTokens * 0.8, model, prompts: estimatedPrompts, hasLongContext: estimatedTokens > 50000 }); const likelyCase = calculateCost({ readTokens, writeTokens, model, prompts: estimatedPrompts, hasLongContext: estimatedTokens > 50000 }); const worstCase = calculateCost({ readTokens: readTokens * 1.5, // 50% more tokens writeTokens: writeTokens * 1.5, model, prompts: estimatedPrompts * 2, // Double the prompts hasLongContext: true }); return { model, estimatedTokens, estimatedPrompts, costRange: { best: bestCase.costs.total, likely: likelyCase.costs.total, worst: worstCase.costs.total }, breakdown: { best: bestCase, likely: likelyCase, worst: worstCase } }; } // Helper to format cost for display export function formatCost(dollars) { if (dollars < 0.01) { return `$${(dollars * 100).toFixed(3)}¢`; } else if (dollars < 1) { return `$${dollars.toFixed(3)}`; } else { return `$${dollars.toFixed(2)}`; } } export default { MODEL_PRICING, CACHE_PATTERNS, calculateCost, estimateTaskCost, formatCost }; //# sourceMappingURL=modelPricing.js.map