UNPKG

@bernierllc/anthropic-client

Version:

Type-safe Anthropic Claude API client with automatic rate limiting, retry logic, streaming support, and cost tracking

288 lines 11.2 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AnthropicClient = void 0; const sdk_1 = __importDefault(require("@anthropic-ai/sdk")); const logger_1 = require("@bernierllc/logger"); const retry_policy_1 = require("@bernierllc/retry-policy"); const types_1 = require("./types"); class SimpleRateLimiter { constructor(maxRequests, windowMs) { this.maxRequests = maxRequests; this.windowMs = windowMs; this.requests = []; } tryAcquire() { const now = Date.now(); this.requests = this.requests.filter(time => now - time < this.windowMs); if (this.requests.length < this.maxRequests) { this.requests.push(now); return true; } return false; } getWaitTime() { if (this.requests.length === 0) return 0; const oldestRequest = Math.min(...this.requests); const waitTime = this.windowMs - (Date.now() - oldestRequest); return Math.max(0, waitTime); } getRemainingRequests() { const now = Date.now(); this.requests = this.requests.filter(time => now - time < this.windowMs); return Math.max(0, this.maxRequests - this.requests.length); } } class AnthropicClient { constructor(config) { this.rateLimiter = null; this.stats = { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, totalTokens: 0, totalCost: 0, requestsByModel: {} }; if (!config.apiKey) { throw new Error('API key is required'); } this.config = { apiKey: config.apiKey, maxRetries: config.maxRetries ?? 3, rateLimit: config.rateLimit === null ? null : (config.rateLimit ?? { requestsPerMinute: 50, tokensPerMinute: 40000 }), defaultModel: config.defaultModel ?? types_1.ClaudeModel.SONNET, enableLogging: config.enableLogging ?? true }; this.client = new sdk_1.default({ apiKey: this.config.apiKey }); this.logger = new logger_1.Logger({ level: this.config.enableLogging ? logger_1.LogLevel.INFO : logger_1.LogLevel.ERROR, transports: [ new logger_1.ConsoleTransport({ level: this.config.enableLogging ? logger_1.LogLevel.INFO : logger_1.LogLevel.ERROR }) ] }); if (this.config.rateLimit) { this.rateLimiter = new SimpleRateLimiter(this.config.rateLimit.requestsPerMinute, 60000); } this.logger.info('AnthropicClient initialized', { model: this.config.defaultModel, rateLimit: this.config.rateLimit }); } async complete(prompt, options = {}) { const model = options.model ?? this.config.defaultModel; const maxTokens = options.maxTokens ?? 1024; this.logger.debug('Starting completion request', { model, promptLength: prompt.length }); let attempt = 0; let lastError = null; while (attempt < this.config.maxRetries) { try { if (this.rateLimiter && !this.rateLimiter.tryAcquire()) { const waitTime = this.rateLimiter.getWaitTime(); this.logger.warn('Rate limit reached, waiting', { waitTime }); await this.sleep(waitTime); continue; } const response = await this.client.messages.create({ model, max_tokens: maxTokens, messages: [{ role: 'user', content: prompt }], system: options.systemPrompt, temperature: options.temperature, top_p: options.topP, top_k: options.topK, stop_sequences: options.stopSequences, metadata: options.metadata }); const content = response.content .filter((block) => block.type === 'text') .map((block) => ('text' in block ? block.text : '')) .join(''); const inputTokens = response.usage.input_tokens; const outputTokens = response.usage.output_tokens; const totalTokens = inputTokens + outputTokens; const pricing = types_1.MODEL_PRICING[model]; const inputCost = (inputTokens / 1000000) * pricing.input; const outputCost = (outputTokens / 1000000) * pricing.output; const totalCost = inputCost + outputCost; this.updateStats(model, inputTokens, outputTokens, totalCost); this.logger.info('Completion successful', { model, inputTokens, outputTokens, totalCost: totalCost.toFixed(6) }); return { success: true, content, usage: { inputTokens, outputTokens, totalTokens }, cost: { inputCost, outputCost, totalCost }, model, stopReason: response.stop_reason || undefined }; } catch (error) { lastError = error; attempt++; if (attempt < this.config.maxRetries && (0, retry_policy_1.shouldRetry)(attempt, error)) { const delay = (0, retry_policy_1.calculateRetryDelay)(attempt, { initialDelayMs: 1000, maxDelayMs: 30000, jitter: true }); this.logger.warn('Request failed, retrying', { attempt, delay, error: error.message }); await this.sleep(delay); } else { break; } } } if (lastError) { this.logger.error('Completion failed after retries', lastError, { attempts: attempt }); } return { success: false, error: lastError?.message ?? 'Unknown error' }; } async stream(prompt, onChunk, options = {}) { const model = options.model ?? this.config.defaultModel; const maxTokens = options.maxTokens ?? 1024; this.logger.debug('Starting streaming request', { model, promptLength: prompt.length }); try { if (this.rateLimiter && !this.rateLimiter.tryAcquire()) { const waitTime = this.rateLimiter.getWaitTime(); this.logger.warn('Rate limit reached, waiting', { waitTime }); await this.sleep(waitTime); } const stream = await this.client.messages.create({ model, max_tokens: maxTokens, messages: [{ role: 'user', content: prompt }], system: options.systemPrompt, temperature: options.temperature, top_p: options.topP, top_k: options.topK, stop_sequences: options.stopSequences, stream: true }); let fullContent = ''; let inputTokens = 0; let outputTokens = 0; let stopReason = ''; for await (const event of stream) { if (event.type === 'content_block_delta') { if (event.delta.type === 'text_delta') { const chunk = event.delta.text; fullContent += chunk; await onChunk(chunk); } } else if (event.type === 'message_start') { inputTokens = event.message.usage.input_tokens; } else if (event.type === 'message_delta') { outputTokens = event.usage.output_tokens; stopReason = event.delta.stop_reason || ''; } } const totalTokens = inputTokens + outputTokens; const pricing = types_1.MODEL_PRICING[model]; const inputCost = (inputTokens / 1000000) * pricing.input; const outputCost = (outputTokens / 1000000) * pricing.output; const totalCost = inputCost + outputCost; this.updateStats(model, inputTokens, outputTokens, totalCost); this.logger.info('Streaming completed', { model, inputTokens, outputTokens, totalCost: totalCost.toFixed(6) }); return { success: true, content: fullContent, usage: { inputTokens, outputTokens, totalTokens }, cost: { inputCost, outputCost, totalCost }, model, stopReason }; } catch (error) { this.logger.error('Streaming failed', error); return { success: false, error: error.message }; } } getUsage() { return { ...this.stats }; } getRateLimitStatus() { if (!this.rateLimiter) { return null; } const requestsRemaining = this.rateLimiter.getRemainingRequests(); const resetTime = new Date(Date.now() + this.rateLimiter.getWaitTime()); return { requestsRemaining, tokensRemaining: 0, resetTime }; } resetUsage() { this.stats = { totalRequests: 0, totalInputTokens: 0, totalOutputTokens: 0, totalTokens: 0, totalCost: 0, requestsByModel: {} }; this.logger.info('Usage statistics reset'); } updateStats(model, inputTokens, outputTokens, cost) { this.stats.totalRequests++; this.stats.totalInputTokens += inputTokens; this.stats.totalOutputTokens += outputTokens; this.stats.totalTokens += inputTokens + outputTokens; this.stats.totalCost += cost; this.stats.requestsByModel[model] = (this.stats.requestsByModel[model] || 0) + 1; } sleep(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } } exports.AnthropicClient = AnthropicClient; //# sourceMappingURL=AnthropicClient.js.map