mcp-product-manager
Version:
MCP Orchestrator for task and project management with web interface
195 lines • 6.86 kB
JavaScript
// modelPricing.js - Accurate pricing for different models and cache usage
// Claude 3.5 pricing as of Jan 2025 (per million tokens)
export const MODEL_PRICING = {
opus: {
name: 'Claude 3 Opus',
input: 15.00,
output: 75.00,
cacheWrite: 3.75, // 25% of input cost to create cache
cacheRead: 1.50 // 10% of input cost to read from cache
},
sonnet: {
name: 'Claude 3.5 Sonnet',
input: 3.00,
output: 15.00,
cacheWrite: 0.75, // 25% of input cost
cacheRead: 0.30 // 10% of input cost
},
haiku: {
name: 'Claude 3.5 Haiku',
input: 0.25,
output: 1.25,
cacheWrite: 0.0625,
cacheRead: 0.025
}
};
// Cache effectiveness estimates
export const CACHE_PATTERNS = {
// Percentage of input that typically comes from cache after first prompt
averageCacheHitRate: 0.7, // 70% of context is cached on average
// Different scenarios
scenarios: {
firstPrompt: {
cacheHitRate: 0, // No cache on first prompt
cacheWriteRate: 0.9 // 90% of context gets cached
},
subsequentPrompts: {
cacheHitRate: 0.8, // 80% from cache
cacheWriteRate: 0.2 // 20% new content to cache
},
longConversation: {
cacheHitRate: 0.9, // 90% from cache in long convos
cacheWriteRate: 0.1 // Only 10% new
}
}
};
/**
* Calculate cost for a task based on tokens and model
* @param {object} params - Cost calculation parameters
* @param {number} params.readTokens - Input/read tokens
* @param {number} params.writeTokens - Output/write tokens
* @param {string} params.model - Model name (opus/sonnet/haiku)
* @param {number} params.prompts - Number of prompts (for cache estimation)
* @param {boolean} params.hasLongContext - Whether task involves long context
* @returns {object} Detailed cost breakdown
*/
export function calculateCost({ readTokens, writeTokens, model = 'sonnet', prompts = 1, hasLongContext = false }) {
const pricing = MODEL_PRICING[model] || MODEL_PRICING.sonnet;
// Estimate cache usage based on number of prompts
let cacheScenario;
if (prompts === 1) {
cacheScenario = CACHE_PATTERNS.scenarios.firstPrompt;
}
else if (prompts > 5 || hasLongContext) {
cacheScenario = CACHE_PATTERNS.scenarios.longConversation;
}
else {
cacheScenario = CACHE_PATTERNS.scenarios.subsequentPrompts;
}
// Break down read tokens into cached vs non-cached
const cachedTokens = Math.round(readTokens * cacheScenario.cacheHitRate);
const nonCachedTokens = readTokens - cachedTokens;
const cacheWriteTokens = Math.round(readTokens * cacheScenario.cacheWriteRate);
// Calculate costs (convert to cost per token, not per million)
const costs = {
// Input costs
nonCachedInput: (nonCachedTokens / 1_000_000) * pricing.input,
cachedInput: (cachedTokens / 1_000_000) * pricing.cacheRead,
cacheCreation: (cacheWriteTokens / 1_000_000) * pricing.cacheWrite,
// Output costs (never cached)
output: (writeTokens / 1_000_000) * pricing.output,
// Totals
totalInput: 0,
totalOutput: 0,
total: 0
};
// Calculate totals
costs.totalInput = costs.nonCachedInput + costs.cachedInput + costs.cacheCreation;
costs.totalOutput = costs.output;
costs.total = costs.totalInput + costs.totalOutput;
return {
model: pricing.name,
tokens: {
read: readTokens,
write: writeTokens,
cached: cachedTokens,
nonCached: nonCachedTokens,
cacheWrite: cacheWriteTokens
},
costs,
breakdown: {
inputBreakdown: [
{ type: 'Non-cached input', tokens: nonCachedTokens, cost: costs.nonCachedInput },
{ type: 'Cached input', tokens: cachedTokens, cost: costs.cachedInput },
{ type: 'Cache creation', tokens: cacheWriteTokens, cost: costs.cacheCreation }
],
outputBreakdown: [
{ type: 'Generated output', tokens: writeTokens, cost: costs.output }
]
},
savings: {
withoutCache: ((readTokens / 1_000_000) * pricing.input) + costs.output,
withCache: costs.total,
saved: 0
}
};
}
/**
* Estimate cost for a task before execution
* @param {object} task - Task object
* @param {number} estimatedTokens - Total estimated tokens
* @returns {object} Cost estimate with ranges
*/
export function estimateTaskCost(task, estimatedTokens) {
const model = task.model || 'sonnet';
// Estimate token distribution (typical ratios)
const readRatio = 0.7; // 70% read
const writeRatio = 0.3; // 30% write
const readTokens = Math.round(estimatedTokens * readRatio);
const writeTokens = Math.round(estimatedTokens * writeRatio);
// Estimate prompts based on task complexity
let estimatedPrompts = 1;
if (task.priority === 'critical' || task.category === 'bug') {
estimatedPrompts = 5; // More back-and-forth expected
}
else if (task.category === 'feature' || task.category === 'refactor') {
estimatedPrompts = 3;
}
// Calculate costs for different scenarios
const bestCase = calculateCost({
readTokens: readTokens * 0.8, // 20% fewer tokens
writeTokens: writeTokens * 0.8,
model,
prompts: estimatedPrompts,
hasLongContext: estimatedTokens > 50000
});
const likelyCase = calculateCost({
readTokens,
writeTokens,
model,
prompts: estimatedPrompts,
hasLongContext: estimatedTokens > 50000
});
const worstCase = calculateCost({
readTokens: readTokens * 1.5, // 50% more tokens
writeTokens: writeTokens * 1.5,
model,
prompts: estimatedPrompts * 2, // Double the prompts
hasLongContext: true
});
return {
model,
estimatedTokens,
estimatedPrompts,
costRange: {
best: bestCase.costs.total,
likely: likelyCase.costs.total,
worst: worstCase.costs.total
},
breakdown: {
best: bestCase,
likely: likelyCase,
worst: worstCase
}
};
}
// Helper to format cost for display
export function formatCost(dollars) {
if (dollars < 0.01) {
return `$${(dollars * 100).toFixed(3)}¢`;
}
else if (dollars < 1) {
return `$${dollars.toFixed(3)}`;
}
else {
return `$${dollars.toFixed(2)}`;
}
}
export default {
MODEL_PRICING,
CACHE_PATTERNS,
calculateCost,
estimateTaskCost,
formatCost
};
//# sourceMappingURL=modelPricing.js.map