ai-functions
Version:
Core AI primitives for building intelligent applications
513 lines • 18.4 kB
JavaScript
/**
* Budget Tracking and Request Tracing for AI Functions
*
* Provides:
* - Token counting and estimation
* - Cost tracking by model
* - Budget limits with alerts
* - Request ID generation and tracing
* - User/tenant context isolation
*
* @packageDocumentation
*/
import { randomUUID } from 'crypto';
// ============================================================================
// Default Model Pricing (per million tokens, USD)
// ============================================================================
const DEFAULT_MODEL_PRICING = {
// OpenAI models
'gpt-4o': { inputPricePerMillion: 2.5, outputPricePerMillion: 10 },
'gpt-4o-mini': { inputPricePerMillion: 0.15, outputPricePerMillion: 0.6 },
'gpt-4-turbo': { inputPricePerMillion: 10, outputPricePerMillion: 30 },
'gpt-4': { inputPricePerMillion: 30, outputPricePerMillion: 60 },
'gpt-3.5-turbo': { inputPricePerMillion: 0.5, outputPricePerMillion: 1.5 },
o1: { inputPricePerMillion: 15, outputPricePerMillion: 60 },
'o1-mini': { inputPricePerMillion: 3, outputPricePerMillion: 12 },
'o1-preview': { inputPricePerMillion: 15, outputPricePerMillion: 60 },
'o3-mini': { inputPricePerMillion: 1.1, outputPricePerMillion: 4.4 },
// Anthropic models
'claude-opus-4-20250514': { inputPricePerMillion: 15, outputPricePerMillion: 75 },
'claude-sonnet-4-20250514': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
'claude-3-5-sonnet-latest': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
'claude-3-5-haiku-latest': { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
'claude-3-opus-20240229': { inputPricePerMillion: 15, outputPricePerMillion: 75 },
'claude-3-sonnet-20240229': { inputPricePerMillion: 3, outputPricePerMillion: 15 },
'claude-3-haiku-20240307': { inputPricePerMillion: 0.25, outputPricePerMillion: 1.25 },
// Google models
'gemini-2.0-flash': { inputPricePerMillion: 0.1, outputPricePerMillion: 0.4 },
'gemini-1.5-pro': { inputPricePerMillion: 1.25, outputPricePerMillion: 5 },
'gemini-1.5-flash': { inputPricePerMillion: 0.075, outputPricePerMillion: 0.3 },
// Default fallback
default: { inputPricePerMillion: 1, outputPricePerMillion: 3 },
};
/**
* Token counter for estimating token usage
*
* Uses a simple character-based estimation that works across models.
* For production, consider integrating tiktoken for more accurate counts.
*/
export class TokenCounter {
/** Average characters per token (rough estimate) */
charsPerToken = 4;
/** Overhead tokens per message for formatting */
messageOverhead = 4;
/**
* Estimate tokens for a text string
*/
estimateTokens(text, _model) {
if (!text)
return 0;
// Count characters
const charCount = text.length;
// Rough estimate: ~4 chars per token for English
// Unicode characters may use more tokens
const unicodeChars = Array.from(text).filter((char) => char.charCodeAt(0) > 127).length;
const asciiChars = charCount - unicodeChars;
// ASCII chars: ~4 per token, Unicode: ~2 per token (rough)
const asciiTokens = Math.ceil(asciiChars / this.charsPerToken);
const unicodeTokens = Math.ceil(unicodeChars / 2);
return asciiTokens + unicodeTokens;
}
/**
* Count tokens in a message array including formatting overhead
*/
countMessageTokens(messages, model) {
let total = 0;
for (const message of messages) {
// Content tokens
total += this.estimateTokens(message.content, model);
// Role tokens (user, assistant, system)
total += this.estimateTokens(message.role, model);
// Message formatting overhead
total += this.messageOverhead;
}
return total;
}
}
// ============================================================================
// Budget Exceeded Error
// ============================================================================
/**
* Error thrown when budget is exceeded
*/
export class BudgetExceededError extends Error {
type;
limit;
current;
requested;
constructor(message, type, limit, current, requested) {
super(message);
this.type = type;
this.limit = limit;
this.current = current;
this.requested = requested;
this.name = 'BudgetExceededError';
}
}
// ============================================================================
// Budget Tracker
// ============================================================================
/**
* Tracks token usage and costs with budget limits
*/
export class BudgetTracker {
totalInputTokens = 0;
totalOutputTokens = 0;
usageByModel = {};
triggeredThresholds = new Set();
requests = [];
config;
constructor(config = {}) {
this.config = {
maxRequestHistory: 100,
...config,
};
}
/**
* Record token usage from a request
*/
recordUsage(usage) {
const { inputTokens, outputTokens, model = 'default' } = usage;
this.totalInputTokens += inputTokens;
this.totalOutputTokens += outputTokens;
// Track by model
if (!this.usageByModel[model]) {
this.usageByModel[model] = { inputTokens: 0, outputTokens: 0, cost: 0 };
}
this.usageByModel[model].inputTokens += inputTokens;
this.usageByModel[model].outputTokens += outputTokens;
// Calculate cost for this usage
const pricing = this.getPricing(model);
const cost = this.calculateCost(inputTokens, outputTokens, pricing);
this.usageByModel[model].cost += cost;
// Check for budget exceeded
this.checkLimitsAfterRecording();
// Check for alerts
this.checkAlerts();
}
/**
* Record a complete request with timing info
*/
recordRequest(info) {
const duration = info.endTime - info.startTime;
const storedRequest = {
...info,
duration,
};
this.requests.push(storedRequest);
// Trim history if needed
const maxHistory = this.config.maxRequestHistory ?? 100;
while (this.requests.length > maxHistory) {
this.requests.shift();
}
// Also record the token usage
this.recordUsage({
inputTokens: info.inputTokens,
outputTokens: info.outputTokens,
model: info.model,
});
}
/**
* Get all recorded requests
*/
getRequests() {
return [...this.requests];
}
/**
* Check if a proposed request would exceed budget
*/
checkBudget(options) {
const { estimatedTokens = 0, model = 'default' } = options;
// Check token limit
if (this.config.maxTokens !== undefined) {
const projectedTotal = this.getTotalTokens() + estimatedTokens;
if (projectedTotal > this.config.maxTokens) {
throw new BudgetExceededError(`Token budget exceeded: ${projectedTotal} tokens would exceed limit of ${this.config.maxTokens}`, 'tokens', this.config.maxTokens, this.getTotalTokens(), estimatedTokens);
}
}
// Check cost limit
if (this.config.maxCost !== undefined) {
const pricing = this.getPricing(model);
// Estimate cost assuming half input, half output
const estimatedCost = this.calculateCost(Math.floor(estimatedTokens / 2), Math.ceil(estimatedTokens / 2), pricing);
const projectedCost = this.getTotalCost() + estimatedCost;
if (projectedCost > this.config.maxCost) {
throw new BudgetExceededError(`Cost budget exceeded: $${projectedCost.toFixed(4)} would exceed limit of $${this.config.maxCost}`, 'cost', this.config.maxCost, this.getTotalCost(), estimatedCost);
}
}
}
/**
* Check limits after recording and throw if exceeded
*/
checkLimitsAfterRecording() {
// Check token limit
if (this.config.maxTokens !== undefined) {
if (this.getTotalTokens() > this.config.maxTokens) {
throw new BudgetExceededError(`Token budget exceeded: ${this.getTotalTokens()} tokens exceeds limit of ${this.config.maxTokens}`, 'tokens', this.config.maxTokens, this.getTotalTokens());
}
}
// Check cost limit
if (this.config.maxCost !== undefined) {
const currentCost = this.getTotalCost();
if (currentCost > this.config.maxCost) {
throw new BudgetExceededError(`Cost budget exceeded: $${currentCost.toFixed(4)} exceeds limit of $${this.config.maxCost}`, 'cost', this.config.maxCost, currentCost);
}
}
}
/**
* Check and trigger alerts
*/
checkAlerts() {
if (!this.config.alertThresholds || !this.config.onAlert)
return;
// Check token-based alerts
if (this.config.maxTokens !== undefined) {
const usage = this.getTotalTokens() / this.config.maxTokens;
for (const threshold of this.config.alertThresholds) {
if (usage >= threshold && !this.triggeredThresholds.has(threshold)) {
this.triggeredThresholds.add(threshold);
this.config.onAlert({
threshold,
currentUsage: this.getTotalTokens(),
limit: this.config.maxTokens,
type: 'tokens',
});
}
}
}
// Check cost-based alerts
if (this.config.maxCost !== undefined) {
const costUsage = this.getTotalCost() / this.config.maxCost;
for (const threshold of this.config.alertThresholds) {
// Use a different key to not conflict with token thresholds
const costThresholdKey = threshold + 1000;
if (costUsage >= threshold && !this.triggeredThresholds.has(costThresholdKey)) {
this.triggeredThresholds.add(costThresholdKey);
this.config.onAlert({
threshold,
currentUsage: this.getTotalCost(),
limit: this.config.maxCost,
type: 'cost',
});
}
}
}
}
/**
* Get total input tokens
*/
getTotalInputTokens() {
return this.totalInputTokens;
}
/**
* Get total output tokens
*/
getTotalOutputTokens() {
return this.totalOutputTokens;
}
/**
* Get total tokens (input + output)
*/
getTotalTokens() {
return this.totalInputTokens + this.totalOutputTokens;
}
/**
* Get total cost in USD
*/
getTotalCost() {
let total = 0;
for (const model of Object.keys(this.usageByModel)) {
const usage = this.usageByModel[model];
if (usage) {
total += usage.cost;
}
}
return total;
}
/**
* Get cost breakdown by model
*/
getCostByModel() {
const result = {};
for (const model of Object.keys(this.usageByModel)) {
const usage = this.usageByModel[model];
if (usage) {
result[model] = usage.cost;
}
}
return result;
}
/**
* Get remaining budget
*/
getRemainingBudget() {
const result = {};
if (this.config.maxTokens !== undefined) {
result.tokens = Math.max(0, this.config.maxTokens - this.getTotalTokens());
}
if (this.config.maxCost !== undefined) {
result.cost = Math.max(0, this.config.maxCost - this.getTotalCost());
}
return result;
}
/**
* Reset all tracking
*/
reset() {
this.totalInputTokens = 0;
this.totalOutputTokens = 0;
this.usageByModel = {};
this.triggeredThresholds.clear();
this.requests = [];
}
/**
* Export current state for persistence
*/
export() {
return {
totalInputTokens: this.totalInputTokens,
totalOutputTokens: this.totalOutputTokens,
totalCost: this.getTotalCost(),
usageByModel: { ...this.usageByModel },
triggeredThresholds: Array.from(this.triggeredThresholds),
};
}
/**
* Import previously exported state
*/
import(snapshot) {
this.totalInputTokens = snapshot.totalInputTokens;
this.totalOutputTokens = snapshot.totalOutputTokens;
this.usageByModel = { ...snapshot.usageByModel };
this.triggeredThresholds = new Set(snapshot.triggeredThresholds);
}
/**
* Get pricing for a model
*/
getPricing(model) {
// Check custom pricing first
const customPrice = this.config.customPricing?.[model];
if (customPrice) {
return customPrice;
}
// Check default pricing
const defaultPrice = DEFAULT_MODEL_PRICING[model];
if (defaultPrice) {
return defaultPrice;
}
// Fallback to default (always defined)
return DEFAULT_MODEL_PRICING['default'];
}
/**
* Calculate cost for token usage
*/
calculateCost(inputTokens, outputTokens, pricing) {
const inputCost = (inputTokens / 1_000_000) * pricing.inputPricePerMillion;
const outputCost = (outputTokens / 1_000_000) * pricing.outputPricePerMillion;
return inputCost + outputCost;
}
}
// ============================================================================
// Request Context
// ============================================================================
/**
* Request context for tracing and user isolation
*/
export class RequestContext {
requestId;
userId;
tenantId;
parentRequestId;
depth;
metadata;
traceId;
spanId;
constructor(options = {}) {
this.requestId = options.requestId ?? randomUUID();
if (options.userId !== undefined)
this.userId = options.userId;
if (options.tenantId !== undefined)
this.tenantId = options.tenantId;
if (options.parentRequestId !== undefined)
this.parentRequestId = options.parentRequestId;
this.depth = options.depth ?? 0;
if (options.metadata !== undefined)
this.metadata = options.metadata;
// Generate trace/span IDs for W3C traceparent
this.traceId = randomUUID().replace(/-/g, '');
this.spanId = randomUUID().replace(/-/g, '').slice(0, 16);
}
/**
* Create a child context that inherits from this one
*/
createChild(options = {}) {
// Destructure to separate metadata from other options
const { metadata: childMetadata, ...restOptions } = options;
return new RequestContext({
userId: this.userId,
tenantId: this.tenantId,
parentRequestId: this.requestId,
...restOptions,
metadata: {
...this.metadata,
...childMetadata,
},
depth: this.depth + 1,
});
}
/**
* Serialize to trace headers
*/
toTraceHeaders() {
const headers = {
'x-request-id': this.requestId,
};
if (this.userId) {
headers['x-user-id'] = this.userId;
}
if (this.tenantId) {
headers['x-tenant-id'] = this.tenantId;
}
if (this.parentRequestId) {
headers['x-parent-request-id'] = this.parentRequestId;
}
return headers;
}
/**
* Generate W3C traceparent header
* Format: version-trace_id-parent_id-flags
*/
toTraceparent() {
const version = '00';
const flags = '01'; // sampled
return `${version}-${this.traceId}-${this.spanId}-${flags}`;
}
/**
* Create a RequestContext from trace headers
*/
static fromHeaders(headers) {
const opts = {};
if (headers['x-request-id'] !== undefined)
opts.requestId = headers['x-request-id'];
if (headers['x-user-id'] !== undefined)
opts.userId = headers['x-user-id'];
if (headers['x-tenant-id'] !== undefined)
opts.tenantId = headers['x-tenant-id'];
if (headers['x-parent-request-id'] !== undefined)
opts.parentRequestId = headers['x-parent-request-id'];
return new RequestContext(opts);
}
}
/**
* Create a new request context
*/
export function createRequestContext(options = {}) {
return new RequestContext(options);
}
// Track nested budget contexts
let currentBudgetTracker = null;
/**
* Execute a function with budget tracking
*
* @example
* ```ts
* const result = await withBudget({ maxTokens: 1000 }, async (tracker) => {
* tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
* return 'success'
* })
* ```
*/
export async function withBudget(options, fn) {
const { userId, tenantId, ...budgetConfig } = options;
const tracker = new BudgetTracker(budgetConfig);
const ctxOptions = {};
if (userId !== undefined)
ctxOptions.userId = userId;
if (tenantId !== undefined)
ctxOptions.tenantId = tenantId;
const ctx = userId || tenantId ? createRequestContext(ctxOptions) : undefined;
// Track parent tracker for nested contexts
const parentTracker = currentBudgetTracker;
// Create a wrapper tracker that propagates to parent
const wrappedTracker = new Proxy(tracker, {
get(target, prop, receiver) {
const value = Reflect.get(target, prop, receiver);
// Wrap recordUsage to propagate to parent
if (prop === 'recordUsage' && parentTracker) {
return (usage) => {
target.recordUsage(usage);
parentTracker.recordUsage(usage);
};
}
return value;
},
});
currentBudgetTracker = tracker;
try {
return await fn(wrappedTracker, ctx);
}
finally {
currentBudgetTracker = parentTracker;
}
}
//# sourceMappingURL=budget.js.map