ai-functions
Version:
Core AI primitives for building intelligent applications
801 lines (650 loc) • 24.3 kB
text/typescript
/**
* Tests for Budget Tracking and Request Tracing
*
* TDD: RED phase - Write failing tests first
*
* Features tested:
* 1. Token counting per request
* 2. Budget limits (reject when exceeded)
* 3. Request ID generation and propagation
* 4. User/tenant context isolation
* 5. Cost tracking by model
*/
import { describe, it, expect, beforeEach, vi } from 'vitest'
import {
BudgetTracker,
TokenCounter,
RequestContext,
withBudget,
createRequestContext,
BudgetExceededError,
type BudgetConfig,
type TokenUsage,
type RequestInfo,
type BudgetAlert,
} from '../src/budget.js'
import { configure, resetContext, withContext } from '../src/context.js'
// ============================================================================
// Token Counting Tests
// ============================================================================
describe('TokenCounter', () => {
describe('estimateTokens', () => {
it('estimates tokens for a simple string', () => {
const counter = new TokenCounter()
const tokens = counter.estimateTokens('Hello, world!')
// Rough estimate: ~4 chars per token for English
expect(tokens).toBeGreaterThan(0)
expect(tokens).toBeLessThan(10)
})
it('estimates tokens for longer text', () => {
const counter = new TokenCounter()
const text = 'The quick brown fox jumps over the lazy dog. '.repeat(10)
const tokens = counter.estimateTokens(text)
// Should be roughly proportional to length
expect(tokens).toBeGreaterThan(50)
expect(tokens).toBeLessThan(200)
})
it('handles empty string', () => {
const counter = new TokenCounter()
expect(counter.estimateTokens('')).toBe(0)
})
it('handles unicode and special characters', () => {
const counter = new TokenCounter()
const tokens = counter.estimateTokens('Hello! こんにちは 你好 🌍')
expect(tokens).toBeGreaterThan(0)
})
})
describe('model-specific estimation', () => {
it('adjusts estimation for different models', () => {
const counter = new TokenCounter()
const text = 'Hello, world!'
const gpt4Tokens = counter.estimateTokens(text, 'gpt-4o')
const claudeTokens = counter.estimateTokens(text, 'claude-sonnet-4-20250514')
// Both should give reasonable estimates
expect(gpt4Tokens).toBeGreaterThan(0)
expect(claudeTokens).toBeGreaterThan(0)
})
})
describe('countMessageTokens', () => {
it('counts tokens in a message array', () => {
const counter = new TokenCounter()
const messages = [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: 'Hello!' },
]
const tokens = counter.countMessageTokens(messages)
expect(tokens).toBeGreaterThan(5)
})
it('includes overhead for message formatting', () => {
const counter = new TokenCounter()
const textOnly = counter.estimateTokens('You are a helpful assistant. Hello!')
const messages = [
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: 'Hello!' },
]
const messageTokens = counter.countMessageTokens(messages)
// Message tokens should include some overhead
expect(messageTokens).toBeGreaterThanOrEqual(textOnly)
})
})
})
// ============================================================================
// Budget Tracker Tests
// ============================================================================
describe('BudgetTracker', () => {
let tracker: BudgetTracker
beforeEach(() => {
tracker = new BudgetTracker()
})
describe('token tracking', () => {
it('tracks cumulative input tokens', () => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
expect(tracker.getTotalInputTokens()).toBe(250)
})
it('tracks cumulative output tokens', () => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
expect(tracker.getTotalOutputTokens()).toBe(125)
})
it('tracks total tokens (input + output)', () => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
tracker.recordUsage({ inputTokens: 150, outputTokens: 75 })
expect(tracker.getTotalTokens()).toBe(375)
})
it('starts with zero tokens', () => {
expect(tracker.getTotalTokens()).toBe(0)
expect(tracker.getTotalInputTokens()).toBe(0)
expect(tracker.getTotalOutputTokens()).toBe(0)
})
})
describe('cost tracking', () => {
it('calculates cost based on model pricing', () => {
tracker.recordUsage({
inputTokens: 1000,
outputTokens: 500,
model: 'gpt-4o',
})
const cost = tracker.getTotalCost()
expect(cost).toBeGreaterThan(0)
})
it('tracks cost by model', () => {
tracker.recordUsage({ inputTokens: 1000, outputTokens: 500, model: 'gpt-4o' })
tracker.recordUsage({ inputTokens: 1000, outputTokens: 500, model: 'claude-sonnet-4-20250514' })
const costByModel = tracker.getCostByModel()
expect(costByModel['gpt-4o']).toBeGreaterThan(0)
expect(costByModel['claude-sonnet-4-20250514']).toBeGreaterThan(0)
})
it('uses default model pricing when not specified', () => {
tracker.recordUsage({ inputTokens: 1000, outputTokens: 500 })
// Should not throw, should use default pricing
const cost = tracker.getTotalCost()
expect(cost).toBeGreaterThan(0)
})
})
describe('budget limits', () => {
it('enforces token limits', () => {
const limitedTracker = new BudgetTracker({
maxTokens: 500,
})
limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
// Should throw when attempting to record usage that exceeds limit
expect(() => {
limitedTracker.checkBudget({ estimatedTokens: 300 })
}).toThrow(BudgetExceededError)
})
it('enforces cost limits', () => {
const limitedTracker = new BudgetTracker({
maxCost: 0.10, // $0.10
})
// Record some usage that approaches the limit
// GPT-4o: $2.5/1M input, $10/1M output
// 10k input = $0.025, 5k output = $0.05, total = $0.075
limitedTracker.recordUsage({
inputTokens: 10000,
outputTokens: 5000,
model: 'gpt-4o',
})
// Check should fail if estimated cost would exceed
// 100k tokens at ~$6/1M average = $0.60, which exceeds remaining ~$0.025
expect(() => {
limitedTracker.checkBudget({
estimatedTokens: 100000,
model: 'gpt-4o',
})
}).toThrow(BudgetExceededError)
})
it('allows usage within limits', () => {
const limitedTracker = new BudgetTracker({
maxTokens: 1000,
})
limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
// Should not throw
expect(() => {
limitedTracker.checkBudget({ estimatedTokens: 100 })
}).not.toThrow()
})
it('provides remaining budget info', () => {
const limitedTracker = new BudgetTracker({
maxTokens: 1000,
maxCost: 1.0,
})
limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
const remaining = limitedTracker.getRemainingBudget()
expect(remaining.tokens).toBe(700)
expect(remaining.cost).toBeLessThan(1.0)
})
})
describe('budget alerts', () => {
it('triggers alert at 50% threshold', () => {
const alertCallback = vi.fn()
const limitedTracker = new BudgetTracker({
maxTokens: 1000,
alertThresholds: [0.5],
onAlert: alertCallback,
})
limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
expect(alertCallback).toHaveBeenCalledWith(
expect.objectContaining({
threshold: 0.5,
currentUsage: expect.any(Number),
limit: 1000,
})
)
})
it('triggers multiple alerts at different thresholds', () => {
const alertCallback = vi.fn()
const limitedTracker = new BudgetTracker({
maxTokens: 1000,
alertThresholds: [0.5, 0.8, 1.0],
onAlert: alertCallback,
})
// 50% threshold
limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
expect(alertCallback).toHaveBeenCalledTimes(1)
// 80% threshold
limitedTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
expect(alertCallback).toHaveBeenCalledTimes(2)
// 100% threshold
limitedTracker.recordUsage({ inputTokens: 150, outputTokens: 50 })
expect(alertCallback).toHaveBeenCalledTimes(3)
})
it('does not re-trigger same threshold', () => {
const alertCallback = vi.fn()
const limitedTracker = new BudgetTracker({
maxTokens: 1000,
alertThresholds: [0.5],
onAlert: alertCallback,
})
limitedTracker.recordUsage({ inputTokens: 400, outputTokens: 100 })
limitedTracker.recordUsage({ inputTokens: 50, outputTokens: 25 })
// Should only be called once even though still above 50%
expect(alertCallback).toHaveBeenCalledTimes(1)
})
})
describe('reset and persistence', () => {
it('resets token counts', () => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
tracker.reset()
expect(tracker.getTotalTokens()).toBe(0)
})
it('exports usage data for persistence', () => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50, model: 'gpt-4o' })
tracker.recordUsage({ inputTokens: 200, outputTokens: 100, model: 'claude-sonnet-4-20250514' })
const snapshot = tracker.export()
expect(snapshot.totalInputTokens).toBe(300)
expect(snapshot.totalOutputTokens).toBe(150)
expect(snapshot.usageByModel).toHaveProperty('gpt-4o')
expect(snapshot.usageByModel).toHaveProperty('claude-sonnet-4-20250514')
})
it('imports previously exported data', () => {
const snapshot = {
totalInputTokens: 500,
totalOutputTokens: 250,
totalCost: 0.05,
usageByModel: {
'gpt-4o': { inputTokens: 500, outputTokens: 250, cost: 0.05 },
},
triggeredThresholds: [0.5],
}
tracker.import(snapshot)
expect(tracker.getTotalInputTokens()).toBe(500)
expect(tracker.getTotalOutputTokens()).toBe(250)
})
})
})
// ============================================================================
// Request Context Tests
// ============================================================================
describe('RequestContext', () => {
describe('request ID generation', () => {
it('generates unique request IDs', () => {
const ctx1 = createRequestContext()
const ctx2 = createRequestContext()
expect(ctx1.requestId).toBeDefined()
expect(ctx2.requestId).toBeDefined()
expect(ctx1.requestId).not.toBe(ctx2.requestId)
})
it('generates IDs with expected format', () => {
const ctx = createRequestContext()
// Should be a valid UUID or similar format
expect(ctx.requestId).toMatch(/^[a-z0-9-]+$/i)
expect(ctx.requestId.length).toBeGreaterThan(8)
})
it('accepts custom request ID', () => {
const ctx = createRequestContext({ requestId: 'custom-123' })
expect(ctx.requestId).toBe('custom-123')
})
})
describe('user context', () => {
it('stores user ID', () => {
const ctx = createRequestContext({ userId: 'user-456' })
expect(ctx.userId).toBe('user-456')
})
it('stores tenant ID', () => {
const ctx = createRequestContext({ tenantId: 'tenant-789' })
expect(ctx.tenantId).toBe('tenant-789')
})
it('stores both user and tenant', () => {
const ctx = createRequestContext({
userId: 'user-456',
tenantId: 'tenant-789',
})
expect(ctx.userId).toBe('user-456')
expect(ctx.tenantId).toBe('tenant-789')
})
})
describe('parent-child relationships', () => {
it('tracks parent request ID', () => {
const parentCtx = createRequestContext()
const childCtx = createRequestContext({ parentRequestId: parentCtx.requestId })
expect(childCtx.parentRequestId).toBe(parentCtx.requestId)
})
it('creates child context from parent', () => {
const parentCtx = createRequestContext({ userId: 'user-123' })
const childCtx = parentCtx.createChild()
expect(childCtx.parentRequestId).toBe(parentCtx.requestId)
expect(childCtx.userId).toBe('user-123') // Inherits user
expect(childCtx.requestId).not.toBe(parentCtx.requestId) // New ID
})
it('allows depth tracking', () => {
const root = createRequestContext()
const child = root.createChild()
const grandchild = child.createChild()
expect(root.depth).toBe(0)
expect(child.depth).toBe(1)
expect(grandchild.depth).toBe(2)
})
})
describe('trace context', () => {
it('serializes to trace headers', () => {
const ctx = createRequestContext({
userId: 'user-123',
tenantId: 'tenant-456',
})
const headers = ctx.toTraceHeaders()
expect(headers['x-request-id']).toBe(ctx.requestId)
expect(headers['x-user-id']).toBe('user-123')
expect(headers['x-tenant-id']).toBe('tenant-456')
})
it('deserializes from trace headers', () => {
const headers = {
'x-request-id': 'req-789',
'x-user-id': 'user-123',
'x-tenant-id': 'tenant-456',
'x-parent-request-id': 'parent-123',
}
const ctx = RequestContext.fromHeaders(headers)
expect(ctx.requestId).toBe('req-789')
expect(ctx.userId).toBe('user-123')
expect(ctx.tenantId).toBe('tenant-456')
expect(ctx.parentRequestId).toBe('parent-123')
})
it('generates W3C traceparent header', () => {
const ctx = createRequestContext()
const traceparent = ctx.toTraceparent()
// Format: version-trace_id-parent_id-flags
expect(traceparent).toMatch(/^00-[a-f0-9]{32}-[a-f0-9]{16}-[a-f0-9]{2}$/)
})
})
describe('metadata', () => {
it('stores custom metadata', () => {
const ctx = createRequestContext({
metadata: {
feature: 'chat',
environment: 'production',
},
})
expect(ctx.metadata?.feature).toBe('chat')
expect(ctx.metadata?.environment).toBe('production')
})
it('merges metadata in child contexts', () => {
const parentCtx = createRequestContext({
metadata: { feature: 'chat' },
})
const childCtx = parentCtx.createChild({
metadata: { action: 'summarize' },
})
expect(childCtx.metadata?.feature).toBe('chat')
expect(childCtx.metadata?.action).toBe('summarize')
})
})
})
// ============================================================================
// withBudget Tests
// ============================================================================
describe('withBudget', () => {
beforeEach(() => {
resetContext()
})
it('executes function within budget context', async () => {
const result = await withBudget({ maxTokens: 1000 }, async (tracker) => {
tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
return 'success'
})
expect(result).toBe('success')
})
it('provides budget tracker to callback', async () => {
await withBudget({ maxTokens: 1000 }, async (tracker) => {
expect(tracker).toBeInstanceOf(BudgetTracker)
expect(typeof tracker.recordUsage).toBe('function')
expect(typeof tracker.getTotalTokens).toBe('function')
})
})
it('throws when budget exceeded', async () => {
await expect(
withBudget({ maxTokens: 100 }, async (tracker) => {
tracker.recordUsage({ inputTokens: 150, outputTokens: 50 })
return 'should not reach'
})
).rejects.toThrow(BudgetExceededError)
})
it('supports cost-based limits', async () => {
await expect(
withBudget({ maxCost: 0.001 }, async (tracker) => {
// Record usage that exceeds cost limit
tracker.recordUsage({
inputTokens: 100000,
outputTokens: 50000,
model: 'gpt-4o',
})
return 'should not reach'
})
).rejects.toThrow(BudgetExceededError)
})
it('nests budget contexts correctly', async () => {
await withBudget({ maxTokens: 1000 }, async (outerTracker) => {
outerTracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
await withBudget({ maxTokens: 500 }, async (innerTracker) => {
innerTracker.recordUsage({ inputTokens: 50, outputTokens: 25 })
// Inner tracker has its own limit
expect(innerTracker.getTotalTokens()).toBe(75)
})
// Outer tracker should also have the inner usage
expect(outerTracker.getTotalTokens()).toBe(225)
})
})
it('includes request context when provided', async () => {
await withBudget(
{
maxTokens: 1000,
userId: 'user-123',
tenantId: 'tenant-456',
},
async (tracker, ctx) => {
expect(ctx?.userId).toBe('user-123')
expect(ctx?.tenantId).toBe('tenant-456')
}
)
})
})
// ============================================================================
// User/Tenant Context Isolation Tests
// ============================================================================
describe('User/Tenant Budget Isolation', () => {
it('tracks budget per user', async () => {
const userBudgets = new Map<string, BudgetTracker>()
const getOrCreateTracker = (userId: string) => {
if (!userBudgets.has(userId)) {
userBudgets.set(userId, new BudgetTracker({ maxTokens: 1000 }))
}
return userBudgets.get(userId)!
}
// User 1 uses some budget
const user1Tracker = getOrCreateTracker('user-1')
user1Tracker.recordUsage({ inputTokens: 300, outputTokens: 150 })
// User 2 uses some budget
const user2Tracker = getOrCreateTracker('user-2')
user2Tracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
// Each user has their own budget
expect(user1Tracker.getTotalTokens()).toBe(450)
expect(user2Tracker.getTotalTokens()).toBe(150)
})
it('enforces per-tenant limits', async () => {
const tenantTracker = new BudgetTracker({ maxTokens: 500 })
// First request uses some budget
tenantTracker.recordUsage({ inputTokens: 200, outputTokens: 100 })
// Second request from same tenant
tenantTracker.recordUsage({ inputTokens: 100, outputTokens: 50 })
// Third request should fail budget check
expect(() => {
tenantTracker.checkBudget({ estimatedTokens: 200 })
}).toThrow(BudgetExceededError)
})
})
// ============================================================================
// Cost Tracking by Model Tests
// ============================================================================
describe('Cost Tracking by Model', () => {
it('uses correct pricing for GPT-4o', () => {
const tracker = new BudgetTracker()
tracker.recordUsage({
inputTokens: 1000000, // 1M tokens
outputTokens: 500000,
model: 'gpt-4o',
})
const cost = tracker.getTotalCost()
// GPT-4o pricing: $2.50/1M input, $10/1M output (as of 2024)
// Expected: $2.50 + $5 = $7.50
expect(cost).toBeCloseTo(7.5, 1)
})
it('uses correct pricing for Claude Sonnet', () => {
const tracker = new BudgetTracker()
tracker.recordUsage({
inputTokens: 1000000,
outputTokens: 500000,
model: 'claude-sonnet-4-20250514',
})
const cost = tracker.getTotalCost()
// Claude Sonnet pricing: $3/1M input, $15/1M output
// Expected: $3 + $7.5 = $10.5
expect(cost).toBeCloseTo(10.5, 1)
})
it('uses correct pricing for Claude Haiku', () => {
const tracker = new BudgetTracker()
tracker.recordUsage({
inputTokens: 1000000,
outputTokens: 500000,
model: 'claude-3-5-haiku-latest',
})
const cost = tracker.getTotalCost()
// Claude Haiku pricing: $0.25/1M input, $1.25/1M output
// Expected: $0.25 + $0.625 = $0.875
expect(cost).toBeCloseTo(0.875, 2)
})
it('aggregates costs across multiple models', () => {
const tracker = new BudgetTracker()
tracker.recordUsage({ inputTokens: 500000, outputTokens: 250000, model: 'gpt-4o' })
tracker.recordUsage({ inputTokens: 500000, outputTokens: 250000, model: 'claude-sonnet-4-20250514' })
const costByModel = tracker.getCostByModel()
expect(Object.keys(costByModel)).toContain('gpt-4o')
expect(Object.keys(costByModel)).toContain('claude-sonnet-4-20250514')
expect(tracker.getTotalCost()).toBe(
costByModel['gpt-4o'] + costByModel['claude-sonnet-4-20250514']
)
})
it('supports custom pricing tables', () => {
const tracker = new BudgetTracker({
customPricing: {
'my-custom-model': {
inputPricePerMillion: 1.0,
outputPricePerMillion: 2.0,
},
},
})
tracker.recordUsage({
inputTokens: 1000000,
outputTokens: 500000,
model: 'my-custom-model',
})
const cost = tracker.getTotalCost()
// Custom pricing: $1/1M input, $2/1M output
// Expected: $1 + $1 = $2
expect(cost).toBeCloseTo(2.0, 2)
})
})
// ============================================================================
// Context Integration Tests
// ============================================================================
describe('Context Integration', () => {
beforeEach(() => {
resetContext()
})
it('integrates budget tracking with execution context', async () => {
configure({
budget: {
maxTokens: 10000,
maxCost: 1.0,
},
})
await withContext({}, async () => {
// Budget should be available in context
// This tests integration with the context system
})
})
it('propagates request context through withContext', async () => {
const ctx = createRequestContext({ userId: 'test-user' })
await withContext({ requestContext: ctx }, async () => {
// Request context should be available
})
})
})
// ============================================================================
// Request Info and Logging Tests
// ============================================================================
describe('Request Info', () => {
it('records request info with timing', () => {
const tracker = new BudgetTracker()
const requestInfo: RequestInfo = {
requestId: 'req-123',
model: 'gpt-4o',
startTime: Date.now() - 500,
endTime: Date.now(),
inputTokens: 100,
outputTokens: 50,
}
tracker.recordRequest(requestInfo)
const requests = tracker.getRequests()
expect(requests).toHaveLength(1)
expect(requests[0].requestId).toBe('req-123')
expect(requests[0].duration).toBeCloseTo(500, -2)
})
it('provides request history for debugging', () => {
const tracker = new BudgetTracker()
tracker.recordRequest({
requestId: 'req-1',
model: 'gpt-4o',
startTime: Date.now() - 1000,
endTime: Date.now() - 500,
inputTokens: 100,
outputTokens: 50,
})
tracker.recordRequest({
requestId: 'req-2',
model: 'claude-sonnet-4-20250514',
startTime: Date.now() - 500,
endTime: Date.now(),
inputTokens: 200,
outputTokens: 100,
})
const requests = tracker.getRequests()
expect(requests).toHaveLength(2)
})
it('limits request history size', () => {
const tracker = new BudgetTracker({ maxRequestHistory: 5 })
for (let i = 0; i < 10; i++) {
tracker.recordRequest({
requestId: `req-${i}`,
model: 'gpt-4o',
startTime: Date.now(),
endTime: Date.now(),
inputTokens: 100,
outputTokens: 50,
})
}
const requests = tracker.getRequests()
expect(requests.length).toBeLessThanOrEqual(5)
// Should keep most recent
expect(requests[requests.length - 1].requestId).toBe('req-9')
})
})