UNPKG

agentlang

Version:

The easiest way to build the most reliable AI agents - enterprise-grade teams of AI agents that collaborate with each other and humans

404 lines (351 loc) 14.6 kB
import { ChatAnthropic } from '@langchain/anthropic'; import { AgentServiceProvider, AIResponse, asAIResponse } from '../provider.js'; import { BaseMessage } from '@langchain/core/messages'; import { getLocalEnv } from '../../auth/defs.js'; export interface AnthropicConfig { model?: string; temperature?: number; maxTokens?: number; maxRetries?: number; apiKey?: string; stream?: boolean; clientOptions?: { defaultHeaders?: Record<string, string>; [key: string]: any; }; /** * Enable prompt caching to reuse context across API calls. * This reduces latency and costs by caching static portions of prompts. * Cache has a 5-minute lifetime by default, refreshed on each use. * Minimum cacheable length: 1024 tokens for Claude 3.5+, 2048 for Haiku. * Beta header: prompt-caching-2024-07-31 * @see https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching */ enablePromptCaching?: boolean; /** * Cache control type for prompt caching. * Currently only 'ephemeral' is supported with 5-minute TTL. * Can be extended to 1-hour with extended-cache-ttl-2025-04-11 beta. */ cacheControl?: 'ephemeral'; /** * Enable extended thinking mode for Claude to show its reasoning process. * When enabled, responses include thinking blocks showing Claude's thought process. * Requires minimum budgetTokens of 1024 and counts towards maxTokens. * NOTE: When thinking is enabled, temperature cannot be customized and will use default. * Useful for complex reasoning, problem-solving, and transparency. * @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking */ enableThinking?: boolean; /** * Token budget for thinking mode (minimum 1024). * Determines how many tokens Claude can use for internal reasoning. * Larger budgets enable more thorough analysis for complex problems. * Must be less than maxTokens. */ budgetTokens?: number; /** * Enable extended output to generate up to 128,000 tokens in a single response. * Useful for long-form content, detailed reports, extensive code generation. * Beta header: output-128k-2025-02-19 * Note: Use streaming to avoid timeouts with long outputs. */ enableExtendedOutput?: boolean; /** * Enable interleaved thinking to see Claude's reasoning in real-time during streaming. * When combined with extended thinking, thinking blocks are streamed alongside content. * Provides transparency into Claude's problem-solving process as it happens. * Beta header: interleaved-thinking-2025-05-14 */ enableInterleavedThinking?: boolean; /** * Enable fine-grained tool streaming for more responsive tool use. * Streams partial JSON updates and character-by-character tool parameters. * Improves UI responsiveness when Claude invokes tools. * Beta header: fine-grained-tool-streaming-2025-05-14 */ enableFineGrainedToolStreaming?: boolean; } export class AnthropicProvider implements AgentServiceProvider { private model: ChatAnthropic; private config: AnthropicConfig; constructor(config?: Map<string, any>) { this.config = this.parseConfig(config); const chatConfig: any = { model: this.config.model, temperature: this.config.temperature, maxTokens: this.config.maxTokens, maxRetries: this.config.maxRetries, streaming: this.config.stream, }; if (this.config.apiKey) { chatConfig.apiKey = this.config.apiKey; } if (this.config.clientOptions) { chatConfig.clientOptions = this.config.clientOptions; } // Configure beta headers based on enabled features const betaFeatures: string[] = []; // Prompt caching: Reuse static content across API calls // Reduces costs by 90% for cached content, improves latency if (this.config.enablePromptCaching) { betaFeatures.push('prompt-caching-2024-07-31'); } // Extended output: Generate up to 128k tokens (vs standard 8k) // Essential for long-form content generation if (this.config.enableExtendedOutput) { betaFeatures.push('output-128k-2025-02-19'); } // Interleaved thinking: Stream thinking blocks alongside regular content // Shows Claude's reasoning process in real-time during streaming if (this.config.enableInterleavedThinking) { betaFeatures.push('interleaved-thinking-2025-05-14'); } // Fine-grained tool streaming: Stream partial tool parameters // Provides character-by-character updates for better UX if (this.config.enableFineGrainedToolStreaming) { betaFeatures.push('fine-grained-tool-streaming-2025-05-14'); } if (betaFeatures.length > 0) { chatConfig.clientOptions = { ...chatConfig.clientOptions, defaultHeaders: { ...chatConfig.clientOptions?.defaultHeaders, 'anthropic-beta': betaFeatures.join(','), }, }; } // Configure thinking mode if enabled // Thinking mode should be passed to constructor, not invoke method if (this.config.enableThinking) { // Validate budget tokens (minimum 1024 required by API) const budgetTokens = Math.max(1024, this.config.budgetTokens || 1024); // Ensure budget tokens don't exceed max tokens // This prevents API errors and ensures proper token allocation if (budgetTokens >= (this.config.maxTokens || 8192)) { throw new Error( `budgetTokens (${budgetTokens}) must be less than maxTokens (${this.config.maxTokens || 8192})` ); } // When thinking is enabled, temperature must not be customized // Anthropic requires using default temperature with thinking mode delete chatConfig.temperature; chatConfig.thinking = { type: 'enabled', budget_tokens: budgetTokens, }; } this.model = new ChatAnthropic(chatConfig); } private parseConfig(config?: Map<string, any>): AnthropicConfig { const defaultConfig: AnthropicConfig = { model: 'claude-sonnet-4-20250514', temperature: 0.7, maxTokens: 8192, maxRetries: 2, stream: false, enablePromptCaching: false, cacheControl: 'ephemeral', enableThinking: false, budgetTokens: 1024, enableExtendedOutput: false, enableInterleavedThinking: false, enableFineGrainedToolStreaming: false, }; if (!config) { return { ...defaultConfig, apiKey: process.env.ANTHROPIC_API_KEY || getLocalEnv('ANTHROPIC_API_KEY'), }; } const apiKey = config.get('apiKey') || config.get('api_key') || process.env.ANTHROPIC_API_KEY || getLocalEnv('ANTHROPIC_API_KEY'); return { model: config.get('model') || defaultConfig.model, temperature: config.get('temperature') ?? defaultConfig.temperature, maxTokens: config.get('maxTokens') || config.get('max_tokens') || defaultConfig.maxTokens, maxRetries: config.get('maxRetries') || config.get('max_retries') || defaultConfig.maxRetries, stream: (() => { const value = config.get('stream'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.stream; })(), enablePromptCaching: (() => { const value = config.get('enablePromptCaching') || config.get('enable_prompt_caching'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.enablePromptCaching; })(), cacheControl: config.get('cacheControl') || config.get('cache_control') || defaultConfig.cacheControl, enableThinking: (() => { const value = config.get('enableThinking') || config.get('enable_thinking') || config.get('thinking'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.enableThinking; })(), budgetTokens: config.get('budgetTokens') || config.get('budget_tokens') || config.get('thinking_budget') || defaultConfig.budgetTokens, enableExtendedOutput: (() => { const value = config.get('enableExtendedOutput') || config.get('enable_extended_output') || config.get('extendedOutput'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.enableExtendedOutput; })(), enableInterleavedThinking: (() => { const value = config.get('enableInterleavedThinking') || config.get('enable_interleaved_thinking') || config.get('interleavedThinking'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.enableInterleavedThinking; })(), enableFineGrainedToolStreaming: (() => { const value = config.get('enableFineGrainedToolStreaming') || config.get('enable_fine_grained_tool_streaming') || config.get('fineGrainedToolStreaming'); if (value === 'true') return true; if (value === 'false') return false; if (typeof value === 'boolean') return value; return defaultConfig.enableFineGrainedToolStreaming; })(), apiKey, clientOptions: config.get('clientOptions') || config.get('client_options'), }; } async invoke( messages: BaseMessage[], _externalToolSpecs: any[] | undefined ): Promise<AIResponse> { if (!this.config.apiKey) { throw new Error( 'Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or use setLocalEnv("ANTHROPIC_API_KEY", key) or provide apiKey in config.' ); } let processedMessages = messages; if (this.config.enablePromptCaching && messages.length > 0) { processedMessages = this.applyCacheControl(messages); } // Thinking configuration is now handled in the constructor // No need to pass additional options to invoke return asAIResponse(await this.model.invoke(processedMessages)); } /** * Apply cache control to messages for prompt caching optimization. * Caches system messages with substantial content (>1000 chars) to reduce costs. * Cache hits cost 90% less than regular input tokens. */ private applyCacheControl(messages: BaseMessage[]): BaseMessage[] { // Apply cache control to the last system message if present // This follows Anthropic's recommendation to cache long context at the end of system messages if (messages.length === 0) return messages; const processedMessages = [...messages]; // Find the last system message and apply cache control for (let i = processedMessages.length - 1; i >= 0; i--) { const message = processedMessages[i]; if ((message as any)._getType() === 'system') { // Apply cache control to system message content const content = message.content; if (typeof content === 'string' && content.length > 1000) { // Only cache if content is substantial (>1000 chars as a heuristic) (message as any).additional_kwargs = { ...((message as any).additional_kwargs || {}), cache_control: { type: 'ephemeral' }, }; } break; } } return processedMessages; } getConfig(): AnthropicConfig { return { ...this.config }; } updateConfig(newConfig: Partial<AnthropicConfig>): void { this.config = { ...this.config, ...newConfig }; const chatConfig: any = { model: this.config.model, temperature: this.config.temperature, maxTokens: this.config.maxTokens, maxRetries: this.config.maxRetries, streaming: this.config.stream, }; if (this.config.apiKey) { chatConfig.apiKey = this.config.apiKey; } if (this.config.clientOptions) { chatConfig.clientOptions = this.config.clientOptions; } // Configure beta headers based on enabled features const betaFeatures: string[] = []; // Prompt caching: Reuse static content across API calls // Reduces costs by 90% for cached content, improves latency if (this.config.enablePromptCaching) { betaFeatures.push('prompt-caching-2024-07-31'); } // Extended output: Generate up to 128k tokens (vs standard 8k) // Essential for long-form content generation if (this.config.enableExtendedOutput) { betaFeatures.push('output-128k-2025-02-19'); } // Interleaved thinking: Stream thinking blocks alongside regular content // Shows Claude's reasoning process in real-time during streaming if (this.config.enableInterleavedThinking) { betaFeatures.push('interleaved-thinking-2025-05-14'); } // Fine-grained tool streaming: Stream partial tool parameters // Provides character-by-character updates for better UX if (this.config.enableFineGrainedToolStreaming) { betaFeatures.push('fine-grained-tool-streaming-2025-05-14'); } if (betaFeatures.length > 0) { chatConfig.clientOptions = { ...chatConfig.clientOptions, defaultHeaders: { ...chatConfig.clientOptions?.defaultHeaders, 'anthropic-beta': betaFeatures.join(','), }, }; } // Configure thinking mode if enabled // Thinking mode should be passed to constructor, not invoke method if (this.config.enableThinking) { // Validate budget tokens (minimum 1024 required by API) const budgetTokens = Math.max(1024, this.config.budgetTokens || 1024); // Ensure budget tokens don't exceed max tokens // This prevents API errors and ensures proper token allocation if (budgetTokens >= (this.config.maxTokens || 8192)) { throw new Error( `budgetTokens (${budgetTokens}) must be less than maxTokens (${this.config.maxTokens || 8192})` ); } // When thinking is enabled, temperature must not be customized // Anthropic requires using default temperature with thinking mode delete chatConfig.temperature; // Add thinking configuration to the ChatAnthropic constructor chatConfig.thinking = { type: 'enabled', budget_tokens: budgetTokens, }; } this.model = new ChatAnthropic(chatConfig); } }