@bernierllc/anthropic-client
Version:
Type-safe Anthropic Claude API client with automatic rate limiting, retry logic, streaming support, and cost tracking
288 lines • 11.2 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.AnthropicClient = void 0;
const sdk_1 = __importDefault(require("@anthropic-ai/sdk"));
const logger_1 = require("@bernierllc/logger");
const retry_policy_1 = require("@bernierllc/retry-policy");
const types_1 = require("./types");
class SimpleRateLimiter {
constructor(maxRequests, windowMs) {
this.maxRequests = maxRequests;
this.windowMs = windowMs;
this.requests = [];
}
tryAcquire() {
const now = Date.now();
this.requests = this.requests.filter(time => now - time < this.windowMs);
if (this.requests.length < this.maxRequests) {
this.requests.push(now);
return true;
}
return false;
}
getWaitTime() {
if (this.requests.length === 0)
return 0;
const oldestRequest = Math.min(...this.requests);
const waitTime = this.windowMs - (Date.now() - oldestRequest);
return Math.max(0, waitTime);
}
getRemainingRequests() {
const now = Date.now();
this.requests = this.requests.filter(time => now - time < this.windowMs);
return Math.max(0, this.maxRequests - this.requests.length);
}
}
class AnthropicClient {
constructor(config) {
this.rateLimiter = null;
this.stats = {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
totalTokens: 0,
totalCost: 0,
requestsByModel: {}
};
if (!config.apiKey) {
throw new Error('API key is required');
}
this.config = {
apiKey: config.apiKey,
maxRetries: config.maxRetries ?? 3,
rateLimit: config.rateLimit === null ? null : (config.rateLimit ?? {
requestsPerMinute: 50,
tokensPerMinute: 40000
}),
defaultModel: config.defaultModel ?? types_1.ClaudeModel.SONNET,
enableLogging: config.enableLogging ?? true
};
this.client = new sdk_1.default({ apiKey: this.config.apiKey });
this.logger = new logger_1.Logger({
level: this.config.enableLogging ? logger_1.LogLevel.INFO : logger_1.LogLevel.ERROR,
transports: [
new logger_1.ConsoleTransport({
level: this.config.enableLogging ? logger_1.LogLevel.INFO : logger_1.LogLevel.ERROR
})
]
});
if (this.config.rateLimit) {
this.rateLimiter = new SimpleRateLimiter(this.config.rateLimit.requestsPerMinute, 60000);
}
this.logger.info('AnthropicClient initialized', {
model: this.config.defaultModel,
rateLimit: this.config.rateLimit
});
}
async complete(prompt, options = {}) {
const model = options.model ?? this.config.defaultModel;
const maxTokens = options.maxTokens ?? 1024;
this.logger.debug('Starting completion request', { model, promptLength: prompt.length });
let attempt = 0;
let lastError = null;
while (attempt < this.config.maxRetries) {
try {
if (this.rateLimiter && !this.rateLimiter.tryAcquire()) {
const waitTime = this.rateLimiter.getWaitTime();
this.logger.warn('Rate limit reached, waiting', { waitTime });
await this.sleep(waitTime);
continue;
}
const response = await this.client.messages.create({
model,
max_tokens: maxTokens,
messages: [{ role: 'user', content: prompt }],
system: options.systemPrompt,
temperature: options.temperature,
top_p: options.topP,
top_k: options.topK,
stop_sequences: options.stopSequences,
metadata: options.metadata
});
const content = response.content
.filter((block) => block.type === 'text')
.map((block) => ('text' in block ? block.text : ''))
.join('');
const inputTokens = response.usage.input_tokens;
const outputTokens = response.usage.output_tokens;
const totalTokens = inputTokens + outputTokens;
const pricing = types_1.MODEL_PRICING[model];
const inputCost = (inputTokens / 1000000) * pricing.input;
const outputCost = (outputTokens / 1000000) * pricing.output;
const totalCost = inputCost + outputCost;
this.updateStats(model, inputTokens, outputTokens, totalCost);
this.logger.info('Completion successful', {
model,
inputTokens,
outputTokens,
totalCost: totalCost.toFixed(6)
});
return {
success: true,
content,
usage: {
inputTokens,
outputTokens,
totalTokens
},
cost: {
inputCost,
outputCost,
totalCost
},
model,
stopReason: response.stop_reason || undefined
};
}
catch (error) {
lastError = error;
attempt++;
if (attempt < this.config.maxRetries && (0, retry_policy_1.shouldRetry)(attempt, error)) {
const delay = (0, retry_policy_1.calculateRetryDelay)(attempt, {
initialDelayMs: 1000,
maxDelayMs: 30000,
jitter: true
});
this.logger.warn('Request failed, retrying', {
attempt,
delay,
error: error.message
});
await this.sleep(delay);
}
else {
break;
}
}
}
if (lastError) {
this.logger.error('Completion failed after retries', lastError, {
attempts: attempt
});
}
return {
success: false,
error: lastError?.message ?? 'Unknown error'
};
}
async stream(prompt, onChunk, options = {}) {
const model = options.model ?? this.config.defaultModel;
const maxTokens = options.maxTokens ?? 1024;
this.logger.debug('Starting streaming request', { model, promptLength: prompt.length });
try {
if (this.rateLimiter && !this.rateLimiter.tryAcquire()) {
const waitTime = this.rateLimiter.getWaitTime();
this.logger.warn('Rate limit reached, waiting', { waitTime });
await this.sleep(waitTime);
}
const stream = await this.client.messages.create({
model,
max_tokens: maxTokens,
messages: [{ role: 'user', content: prompt }],
system: options.systemPrompt,
temperature: options.temperature,
top_p: options.topP,
top_k: options.topK,
stop_sequences: options.stopSequences,
stream: true
});
let fullContent = '';
let inputTokens = 0;
let outputTokens = 0;
let stopReason = '';
for await (const event of stream) {
if (event.type === 'content_block_delta') {
if (event.delta.type === 'text_delta') {
const chunk = event.delta.text;
fullContent += chunk;
await onChunk(chunk);
}
}
else if (event.type === 'message_start') {
inputTokens = event.message.usage.input_tokens;
}
else if (event.type === 'message_delta') {
outputTokens = event.usage.output_tokens;
stopReason = event.delta.stop_reason || '';
}
}
const totalTokens = inputTokens + outputTokens;
const pricing = types_1.MODEL_PRICING[model];
const inputCost = (inputTokens / 1000000) * pricing.input;
const outputCost = (outputTokens / 1000000) * pricing.output;
const totalCost = inputCost + outputCost;
this.updateStats(model, inputTokens, outputTokens, totalCost);
this.logger.info('Streaming completed', {
model,
inputTokens,
outputTokens,
totalCost: totalCost.toFixed(6)
});
return {
success: true,
content: fullContent,
usage: {
inputTokens,
outputTokens,
totalTokens
},
cost: {
inputCost,
outputCost,
totalCost
},
model,
stopReason
};
}
catch (error) {
this.logger.error('Streaming failed', error);
return {
success: false,
error: error.message
};
}
}
getUsage() {
return { ...this.stats };
}
getRateLimitStatus() {
if (!this.rateLimiter) {
return null;
}
const requestsRemaining = this.rateLimiter.getRemainingRequests();
const resetTime = new Date(Date.now() + this.rateLimiter.getWaitTime());
return {
requestsRemaining,
tokensRemaining: 0,
resetTime
};
}
resetUsage() {
this.stats = {
totalRequests: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
totalTokens: 0,
totalCost: 0,
requestsByModel: {}
};
this.logger.info('Usage statistics reset');
}
updateStats(model, inputTokens, outputTokens, cost) {
this.stats.totalRequests++;
this.stats.totalInputTokens += inputTokens;
this.stats.totalOutputTokens += outputTokens;
this.stats.totalTokens += inputTokens + outputTokens;
this.stats.totalCost += cost;
this.stats.requestsByModel[model] = (this.stats.requestsByModel[model] || 0) + 1;
}
sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
exports.AnthropicClient = AnthropicClient;
//# sourceMappingURL=AnthropicClient.js.map