UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

github.com/vertesia/llumiverse

vertesia/llumiverse

737 lines • 32.9 kB

JavaScript

"use strict"; /** * Shared utilities for Anthropic SDK-based drivers. * * Used by both the native AnthropicDriver (drivers/src/anthropic/) and the * VertexAI Claude pathway (drivers/src/vertexai/models/claude.ts). Both use * the same Anthropic Messages API surface — the only difference is the client * (Anthropic vs AnthropicVertex) and how auth is wired up. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.anthropicUsageToTokenUsage = anthropicUsageToTokenUsage; exports.claudeFinishReason = claudeFinishReason; exports.collectClaudeTools = collectClaudeTools; exports.collectAllTextContent = collectAllTextContent; exports.claudeMaxTokens = claudeMaxTokens; exports.formatClaudePrompt = formatClaudePrompt; exports.createPromptFromResponse = createPromptFromResponse; exports.mergeConsecutiveUserMessages = mergeConsecutiveUserMessages; exports.sanitizeMessages = sanitizeMessages; exports.fixOrphanedToolUse = fixOrphanedToolUse; exports.updateClaudeConversation = updateClaudeConversation; exports.claudeMessagesContainToolBlocks = claudeMessagesContainToolBlocks; exports.convertClaudeToolBlocksToText = convertClaudeToolBlocksToText; exports.getClaudePayload = getClaudePayload; exports.buildClaudeStreamingConversation = buildClaudeStreamingConversation; exports.executeClaudeCompletion = executeClaudeCompletion; exports.streamClaudeCompletion = streamClaudeCompletion; exports.formatAnthropicLlumiverseError = formatAnthropicLlumiverseError; exports.isClaudeErrorRetryable = isClaudeErrorRetryable; const error_1 = require("@anthropic-ai/sdk/error"); const common_1 = require("@llumiverse/common"); const core_1 = require("@llumiverse/core"); const async_1 = require("@llumiverse/core/async"); const claude_thinking_js_1 = require("./claude-thinking.js"); // ============================================================================ // Token usage // ============================================================================ function anthropicUsageToTokenUsage(usage) { const cacheRead = usage.cache_read_input_tokens ?? 0; const cacheWrite = usage.cache_creation_input_tokens ?? 0; return { prompt_new: usage.input_tokens, prompt: usage.input_tokens + cacheRead + cacheWrite, result: usage.output_tokens, total: usage.input_tokens + usage.output_tokens + cacheRead + cacheWrite, prompt_cached: usage.cache_read_input_tokens ?? undefined, prompt_cache_write: usage.cache_creation_input_tokens ?? undefined, }; } // ============================================================================ // Finish reason // ============================================================================ function claudeFinishReason(reason) { if (!reason) return undefined; switch (reason) { case 'end_turn': return 'stop'; case 'max_tokens': return 'length'; default: return reason; // stop_sequence, tool_use } } // ============================================================================ // Content extraction // ============================================================================ function collectClaudeTools(content) { const out = []; for (const block of content) { if (block.type === 'tool_use') { out.push({ id: block.id, tool_name: block.name, tool_input: block.input, }); } } return out.length > 0 ? out : undefined; } function collectAllTextContent(content, includeThoughts = false) { const textParts = []; if (includeThoughts) { for (const block of content) { if (block.type === 'thinking' && block.thinking) { textParts.push(block.thinking); } else if (block.type === 'redacted_thinking' && block.data) { textParts.push(`[Redacted thinking: ${block.data}]`); } } if (textParts.length > 0) { textParts.push(''); } } for (const block of content) { if (block.type === 'text' && block.text) { textParts.push(block.text); } } return textParts.join('\n'); } // ============================================================================ // Max tokens // ============================================================================ function claudeMaxTokens(option) { const modelOptions = option.model_options; if (modelOptions && typeof modelOptions.max_tokens === 'number') { return modelOptions.max_tokens; } let maxSupportedTokens = (0, common_1.getClaudeMaxTokensLimit)(option.model); // Claude 3.7 supports up to 128k with a beta header; default to 64k when no budget is set. if (option.model.includes('claude-3-7-sonnet') && (modelOptions?.thinking_budget_tokens ?? 0) < 48000) { maxSupportedTokens = 64000; } return maxSupportedTokens; } async function collectFileBlocks(segment, restrictedTypes = false) { const contentBlocks = []; for (const file of segment.files || []) { if (file.mime_type?.startsWith('image/')) { const allowedTypes = ['image/png', 'image/jpeg', 'image/gif', 'image/webp']; if (!allowedTypes.includes(file.mime_type)) { throw new Error(`Unsupported image type: ${file.mime_type}`); } const mimeType = String(file.mime_type); contentBlocks.push({ type: 'image', source: { type: 'base64', data: await (0, core_1.readStreamAsBase64)(await file.getStream()), media_type: mimeType, }, }); } else if (!restrictedTypes) { if (file.mime_type === 'application/pdf') { contentBlocks.push({ title: file.name, type: 'document', source: { type: 'base64', data: await (0, core_1.readStreamAsBase64)(await file.getStream()), media_type: 'application/pdf', }, }); } else if (file.mime_type?.startsWith('text/')) { contentBlocks.push({ title: file.name, type: 'document', source: { type: 'text', data: await (0, core_1.readStreamAsString)(await file.getStream()), media_type: 'text/plain', }, }); } } } return contentBlocks; } // ============================================================================ // Prompt formatting (PromptSegment[] → ClaudePrompt) // ============================================================================ async function formatClaudePrompt(segments, options) { let system = segments .filter((s) => s.role === core_1.PromptRole.system) .map((s) => ({ text: s.content, type: 'text' })); if (options.result_schema) { const schemaText = options.tools && options.tools.length > 0 ? 'When not calling tools, the answer must be a JSON object using the following JSON Schema:\n' + JSON.stringify(options.result_schema) : 'The answer must be a JSON object using the following JSON Schema:\n' + JSON.stringify(options.result_schema); system.push({ text: schemaText, type: 'text' }); } let messages = []; const safetyMessages = []; for (const segment of segments) { if (segment.role === core_1.PromptRole.system) continue; if (segment.role === core_1.PromptRole.tool) { if (!segment.tool_use_id) { throw new Error('Tool prompt segment must have a tool use ID'); } const contentBlocks = []; if (segment.content) { contentBlocks.push({ type: 'text', text: segment.content }); } contentBlocks.push(...(await collectFileBlocks(segment, true))); messages.push({ role: 'user', content: [{ type: 'tool_result', tool_use_id: segment.tool_use_id, content: contentBlocks, }], }); } else { const contentBlocks = []; if (segment.content) { contentBlocks.push({ type: 'text', text: segment.content }); } contentBlocks.push(...(await collectFileBlocks(segment, false))); if (contentBlocks.length === 0) continue; const messageParam = { role: segment.role === core_1.PromptRole.assistant ? 'assistant' : 'user', content: contentBlocks, }; if (segment.role === core_1.PromptRole.safety) { safetyMessages.push(messageParam); } else { messages.push(messageParam); } } } messages = messages.concat(safetyMessages); if (system && system.length === 0) system = undefined; return { messages, system }; } // ============================================================================ // Conversation management // ============================================================================ function createPromptFromResponse(response) { return { messages: [{ role: response.role, content: response.content }], system: undefined, }; } function mergeConsecutiveUserMessages(messages) { if (messages.length === 0) return []; const needsMerging = messages.some((msg, i) => i < messages.length - 1 && msg.role === 'user' && messages[i + 1].role === 'user'); if (!needsMerging) return messages; const result = []; let i = 0; while (i < messages.length) { const current = messages[i]; if (current.role === 'user') { const mergedContent = []; while (i < messages.length && messages[i].role === 'user') { const userMsg = messages[i]; if (Array.isArray(userMsg.content)) { mergedContent.push(...userMsg.content); } else if (typeof userMsg.content === 'string') { mergedContent.push({ type: 'text', text: userMsg.content }); } i++; } result.push({ role: 'user', content: mergedContent }); } else { result.push(current); i++; } } return result; } function sanitizeMessages(messages) { const result = []; for (const message of messages) { if (typeof message.content === 'string') { if (message.content.trim()) result.push(message); continue; } const filteredContent = message.content.filter((block) => { if (block.type === 'text') return block.text && block.text.trim().length > 0; return true; }); if (filteredContent.length > 0) { result.push({ ...message, content: filteredContent }); } } return result; } function fixOrphanedToolUse(messages) { if (messages.length < 2) return messages; const result = []; for (let i = 0; i < messages.length; i++) { const current = messages[i]; result.push(current); if (current.role === 'assistant' && Array.isArray(current.content)) { const toolUseBlocks = current.content.filter((block) => block.type === 'tool_use'); if (toolUseBlocks.length > 0) { const nextMessage = messages[i + 1]; if (nextMessage && nextMessage.role === 'user' && Array.isArray(nextMessage.content)) { const toolResultIds = new Set(nextMessage.content .filter((block) => block.type === 'tool_result') .map((block) => block.tool_use_id)); const orphaned = toolUseBlocks.filter((block) => !toolResultIds.has(block.id)); if (orphaned.length > 0) { const syntheticResults = orphaned.map((block) => ({ type: 'tool_result', tool_use_id: block.id, content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`, })); messages[i + 1] = { ...nextMessage, content: [...syntheticResults, ...nextMessage.content] }; } } else if (nextMessage && nextMessage.role === 'user') { const syntheticResults = toolUseBlocks.map((block) => ({ type: 'tool_result', tool_use_id: block.id, content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`, })); const textContent = typeof nextMessage.content === 'string' ? { type: 'text', text: nextMessage.content } : { type: 'text', text: '' }; messages[i + 1] = { role: 'user', content: [...syntheticResults, textContent] }; } } } } return result; } function updateClaudeConversation(conversation, prompt) { const baseSystemMessages = conversation?.system || []; const baseMessages = conversation?.messages || []; const system = baseSystemMessages.concat(prompt.system || []); const combined = sanitizeMessages(baseMessages.concat(prompt.messages || [])); const mergedMessages = mergeConsecutiveUserMessages(combined); return { messages: mergedMessages, system: system.length > 0 ? system : undefined, }; } function claudeMessagesContainToolBlocks(messages) { for (const msg of messages) { if (!Array.isArray(msg.content)) continue; for (const block of msg.content) { if (typeof block === 'object' && block !== null && 'type' in block) { if (block.type === 'tool_use' || block.type === 'tool_result') return true; } } } return false; } function convertClaudeToolBlocksToText(messages) { return messages.map((msg) => { if (!Array.isArray(msg.content)) return msg; let hasToolBlocks = false; for (const block of msg.content) { if (typeof block === 'object' && block !== null && 'type' in block && (block.type === 'tool_use' || block.type === 'tool_result')) { hasToolBlocks = true; break; } } if (!hasToolBlocks) return msg; const newContent = []; for (const block of msg.content) { if (typeof block === 'string') { newContent.push(block); continue; } if (block.type === 'tool_use') { const inputStr = block.input ? JSON.stringify(block.input) : ''; const truncated = inputStr.length > 500 ? inputStr.substring(0, 500) + '...' : inputStr; newContent.push({ type: 'text', text: `[Tool call: ${block.name}(${truncated})]` }); } else if (block.type === 'tool_result') { let resultStr = 'No content'; if (typeof block.content === 'string') { resultStr = block.content.length > 500 ? block.content.substring(0, 500) + '...' : block.content; } else if (Array.isArray(block.content)) { const texts = block.content .filter((c) => c.type === 'text') .map((c) => (c.text.length > 500 ? c.text.substring(0, 500) + '...' : c.text)); resultStr = texts.join('\n') || 'No text content'; } newContent.push({ type: 'text', text: `[Tool result: ${resultStr}]` }); } else { newContent.push(block); } } return { ...msg, content: newContent }; }); } // ============================================================================ // Cache control stripping // ============================================================================ function stripClaudeCacheControlFromBlock(block) { if (typeof block === 'object' && block !== null && 'cache_control' in block) { const { cache_control: _cc, ...rest } = block; return rest; } return block; } function stripClaudeCacheControlFromMessages(messages) { return messages.map((msg) => { if (!Array.isArray(msg.content)) return msg; return { ...msg, content: msg.content.map(stripClaudeCacheControlFromBlock) }; }); } function stripClaudeCacheControlFromSystem(system) { if (!system) return undefined; return system.map(stripClaudeCacheControlFromBlock); } function stripClaudeCacheControlFromTools(tools) { if (!tools) return undefined; return tools.map((tool) => { if ('cache_control' in tool) { const { cache_control: _cc, ...rest } = tool; return rest; } return tool; }); } // ============================================================================ // Payload builder // ============================================================================ function getClaudePayload(options, prompt) { const modelName = options.model; const model_options = options.model_options; let requestOptions; if (modelName.includes('claude-3-7-sonnet') && ((model_options?.max_tokens ?? 0) > 64000 || (model_options?.thinking_budget_tokens ?? 0) > 64000)) { requestOptions = { headers: { 'anthropic-beta': 'output-128k-2025-02-19' } }; } const fixedMessages = fixOrphanedToolUse(prompt.messages); let sanitizedMessages = sanitizeMessages(fixedMessages); if (options.tools) { for (const tool of options.tools) { if (tool.input_schema.type !== 'object') { throw new Error(`Tool "${tool.name}" has invalid input_schema.type: expected "object", got "${tool.input_schema.type}"`); } } } const hasTools = options.tools && options.tools.length > 0; if (!hasTools && claudeMessagesContainToolBlocks(sanitizedMessages)) { sanitizedMessages = convertClaudeToolBlocksToText(sanitizedMessages); } sanitizedMessages = stripClaudeCacheControlFromMessages(sanitizedMessages); const sanitizedSystem = stripClaudeCacheControlFromSystem(prompt.system); const sanitizedTools = hasTools ? stripClaudeCacheControlFromTools(options.tools) : undefined; const cacheEnabled = model_options?.cache_enabled === true; if (cacheEnabled) { const cacheTtl = model_options?.cache_ttl; const cacheControl = { type: 'ephemeral', ...(cacheTtl && { ttl: cacheTtl }) }; if (sanitizedSystem && sanitizedSystem.length > 0) { const lastBlock = sanitizedSystem[sanitizedSystem.length - 1]; lastBlock.cache_control = cacheControl; } if (sanitizedTools && sanitizedTools.length > 0) { const lastTool = sanitizedTools[sanitizedTools.length - 1]; lastTool.cache_control = cacheControl; } if (sanitizedMessages.length >= 4) { const pivotMsg = sanitizedMessages[sanitizedMessages.length - 2]; if (Array.isArray(pivotMsg.content) && pivotMsg.content.length > 0) { const lastBlock = pivotMsg.content[pivotMsg.content.length - 1]; if (typeof lastBlock === 'object' && lastBlock !== null && 'type' in lastBlock && lastBlock.type !== 'thinking' && lastBlock.type !== 'redacted_thinking') { lastBlock.cache_control = cacheControl; } } } } const { thinking, outputConfig, hasSamplingRestriction } = (0, claude_thinking_js_1.resolveClaudeThinking)(modelName, model_options); const payload = { messages: sanitizedMessages, system: sanitizedSystem, tools: sanitizedTools, temperature: hasSamplingRestriction ? undefined : model_options?.temperature, model: modelName, max_tokens: claudeMaxTokens(options), top_p: hasSamplingRestriction ? undefined : (model_options?.temperature != null ? undefined : model_options?.top_p), top_k: hasSamplingRestriction ? undefined : model_options?.top_k, stop_sequences: model_options?.stop_sequence, thinking, stream: true, ...(outputConfig && { output_config: outputConfig }), }; return { payload, requestOptions }; } // ============================================================================ // Streaming conversation builder (called after stream completes) // ============================================================================ function buildClaudeStreamingConversation(prompt, result, toolUse, options) { const completionResults = result; const text = completionResults .filter((r) => r.type === 'text') .map((r) => r.value) .join(''); let conversation = updateClaudeConversation(options.conversation, prompt); if (text) { const assistantMsg = { role: 'assistant', content: text }; conversation = updateClaudeConversation(conversation, { messages: [assistantMsg] }); } if (toolUse && toolUse.length > 0) { const toolBlocks = toolUse.map((t) => ({ type: 'tool_use', id: t.id, name: t.tool_name, input: t.tool_input ?? {}, })); const assistantToolMsg = { role: 'assistant', content: toolBlocks }; conversation = updateClaudeConversation(conversation, { messages: [assistantToolMsg] }); } conversation = (0, core_1.incrementConversationTurn)(conversation); const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens, }; let processed = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOptions); processed = (0, core_1.truncateLargeTextInConversation)(processed, stripOptions); processed = (0, core_1.stripHeartbeatsFromConversation)(processed, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); return processed; } // ============================================================================ // Execution helpers (standalone, take a client parameter) // ============================================================================ /** * Execute a non-streaming Claude completion. * Works with any Anthropic-compatible client (Anthropic or AnthropicVertex). */ async function executeClaudeCompletion(client, prompt, options) { const model_options = options.model_options; let conversation = updateClaudeConversation(options.conversation, prompt); const { payload, requestOptions } = getClaudePayload(options, conversation); const result = await client.messages.stream(payload, requestOptions).finalMessage(); const includeThoughts = model_options?.include_thoughts ?? false; const text = collectAllTextContent(result.content, includeThoughts); const tool_use = collectClaudeTools(result.content); conversation = updateClaudeConversation(conversation, createPromptFromResponse(result)); conversation = (0, core_1.incrementConversationTurn)(conversation); const currentTurn = (0, core_1.getConversationMeta)(conversation).turnNumber; const stripOpts = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens, }; let processedConversation = (0, core_1.stripBase64ImagesFromConversation)(conversation, stripOpts); processedConversation = (0, core_1.truncateLargeTextInConversation)(processedConversation, stripOpts); processedConversation = (0, core_1.stripHeartbeatsFromConversation)(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); return { result: text ? [{ type: 'text', value: text }] : [{ type: 'text', value: '' }], tool_use, token_usage: anthropicUsageToTokenUsage(result.usage), finish_reason: tool_use ? 'tool_use' : claudeFinishReason(result?.stop_reason ?? ''), conversation: processedConversation, }; } /** * Execute a streaming Claude completion. * Works with any Anthropic-compatible client (Anthropic or AnthropicVertex). */ async function streamClaudeCompletion(client, prompt, options) { const model_options = options.model_options; const conversation = updateClaudeConversation(options.conversation, prompt); const { payload, requestOptions } = getClaudePayload(options, conversation); const streamingPayload = { ...payload, stream: true }; const response_stream = await client.messages.stream(streamingPayload, requestOptions); let currentToolUse = null; let pendingSpacing = false; const stream = (0, async_1.asyncMap)(response_stream, async (streamEvent) => { switch (streamEvent.type) { case 'message_start': return { result: [{ type: 'text', value: '' }], token_usage: anthropicUsageToTokenUsage(streamEvent.message.usage), }; case 'message_delta': return { result: [{ type: 'text', value: '' }], token_usage: { result: streamEvent.usage.output_tokens }, finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined), }; case 'content_block_start': if (streamEvent.content_block.type === 'tool_use') { currentToolUse = { id: streamEvent.content_block.id, name: streamEvent.content_block.name, inputJson: '' }; return { result: [], tool_use: [{ id: streamEvent.content_block.id, tool_name: streamEvent.content_block.name, tool_input: '', }], }; } if (streamEvent.content_block.type === 'redacted_thinking' && model_options?.include_thoughts) { return { result: [{ type: 'text', value: `[Redacted thinking: ${streamEvent.content_block.data}]` }], }; } break; case 'content_block_delta': switch (streamEvent.delta.type) { case 'text_delta': { const prefix = pendingSpacing ? '\n\n' : ''; pendingSpacing = false; return { result: streamEvent.delta.text ? [{ type: 'text', value: prefix + streamEvent.delta.text }] : [], }; } case 'input_json_delta': if (currentToolUse && streamEvent.delta.partial_json) { return { result: [], tool_use: [{ id: currentToolUse.id, tool_name: '', tool_input: streamEvent.delta.partial_json, }], }; } break; case 'thinking_delta': if (model_options?.include_thoughts) { return { result: streamEvent.delta.thinking ? [{ type: 'text', value: streamEvent.delta.thinking }] : [], }; } break; case 'signature_delta': if (model_options?.include_thoughts) { pendingSpacing = true; } break; } break; case 'content_block_stop': if (currentToolUse) { currentToolUse = null; pendingSpacing = false; } break; } return { result: [] }; }); return stream; } // ============================================================================ // Error handling // ============================================================================ function formatAnthropicLlumiverseError(error, context) { if (error instanceof error_1.AnthropicError && !(error instanceof error_1.APIError)) { // Client-side SDK error (e.g. "Streaming is required for operations that may take longer than 10 minutes"). // These are structural/configuration errors — retrying will never succeed. const errorName = error.constructor?.name || 'AnthropicError'; return new core_1.LlumiverseError(`[${context.provider}] ${error.message}`, false, context, error, undefined, errorName); } if (!(error instanceof error_1.APIError)) { // Not an Anthropic error — rethrow for default handling throw error; } const apiError = error; const httpStatusCode = apiError.status; let message = apiError.message || String(error); let errorType; if (apiError.error && typeof apiError.error === 'object') { const nested = apiError.error; if (nested['error'] && typeof nested['error'] === 'object') { const innerError = nested['error']; errorType = innerError['type']; if (typeof innerError['message'] === 'string') { message = innerError['message']; } } } let userMessage = message; if (httpStatusCode) userMessage = `[${httpStatusCode}] ${userMessage}`; if (errorType && errorType !== 'error') userMessage = `${errorType}: ${userMessage}`; if (apiError.requestID) userMessage += ` (Request ID: ${apiError.requestID})`; const retryable = isClaudeErrorRetryable(error, httpStatusCode, errorType, apiError.headers ?? undefined); const errorName = error.constructor?.name || 'AnthropicError'; return new core_1.LlumiverseError(`[${context.provider}] ${userMessage}`, retryable, context, error, httpStatusCode, errorName); } function isClaudeErrorRetryable(error, httpStatusCode, errorType, headers) { // Honour the server's explicit retry directive first (mirrors SDK shouldRetry logic). const shouldRetryHeader = headers?.get('x-should-retry'); if (shouldRetryHeader === 'true') return true; if (shouldRetryHeader === 'false') return false; if (error instanceof error_1.APIUserAbortError) return false; if (error instanceof error_1.RateLimitError) return true; if (error instanceof error_1.InternalServerError) return true; if (error instanceof error_1.APIConnectionTimeoutError) return true; if (error instanceof error_1.BadRequestError) return false; if (error instanceof error_1.AuthenticationError) return false; if (error instanceof error_1.PermissionDeniedError) return false; if (error instanceof error_1.NotFoundError) return false; if (error instanceof error_1.ConflictError) return true; // SDK retries 409 (lock timeouts) if (error instanceof error_1.UnprocessableEntityError) return false; if (errorType === 'invalid_request_error') return false; if (httpStatusCode !== undefined) { if (httpStatusCode === 429 || httpStatusCode === 408 || httpStatusCode === 529) return true; if (httpStatusCode >= 500 && httpStatusCode < 600) return true; if (httpStatusCode >= 400 && httpStatusCode < 500) return false; } if (error instanceof error_1.APIConnectionError && !(error instanceof error_1.APIConnectionTimeoutError)) return true; return undefined; } //# sourceMappingURL=claude-messages.js.map