UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

394 lines 16.4 kB
import { getMaxTokensLimitVertexAi, ModelType, PromptRole, readStreamAsBase64, readStreamAsString } from "@llumiverse/core"; import { asyncMap } from "@llumiverse/core/async"; export const ANTHROPIC_REGIONS = { us: "us-east5", europe: "europe-west1", global: "global", }; export const NON_GLOBAL_ANTHROPIC_MODELS = [ "claude-3-5", "claude-3", ]; function claudeFinishReason(reason) { if (!reason) return undefined; switch (reason) { case 'end_turn': return "stop"; case 'max_tokens': return "length"; default: return reason; //stop_sequence } } export function collectTools(content) { const out = []; for (const block of content) { if (block.type === "tool_use") { out.push({ id: block.id, tool_name: block.name, tool_input: block.input, }); } } return out.length > 0 ? out : undefined; } function collectAllTextContent(content, includeThoughts = false) { const textParts = []; // First pass: collect thinking blocks if (includeThoughts) { for (const block of content) { if (block.type === 'thinking' && block.thinking) { textParts.push(block.thinking); } else if (block.type === 'redacted_thinking' && block.data) { textParts.push(`[Redacted thinking: ${block.data}]`); } } if (textParts.length > 0) { textParts.push(''); // Create a new line after thinking blocks } } // Second pass: collect text blocks for (const block of content) { if (block.type === 'text' && block.text) { textParts.push(block.text); } } return textParts.join('\n'); } //Used to get a max_token value when not specified in the model options. Claude requires it to be set. function maxToken(option) { const modelOptions = option.model_options; if (modelOptions && typeof modelOptions.max_tokens === "number") { return modelOptions.max_tokens; } else { const thinking_budget = modelOptions?.thinking_budget_tokens ?? 0; let maxSupportedTokens = getMaxTokensLimitVertexAi(option.model); // Fallback to the default max tokens limit for the model if (option.model.includes('claude-3-7-sonnet') && (modelOptions?.thinking_budget_tokens ?? 0) < 48000) { maxSupportedTokens = 64000; // Claude 3.7 can go up to 128k with a beta header, but when no max tokens is specified, we default to 64k. } return Math.min(16000 + thinking_budget, maxSupportedTokens); // Cap to 16k, to avoid taking up too much context window and quota. } } async function collectFileBlocks(segment, restrictedTypes = false) { const contentBlocks = []; for (const file of segment.files || []) { if (file.mime_type?.startsWith("image/")) { const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"]; if (!allowedTypes.includes(file.mime_type)) { throw new Error(`Unsupported image type: ${file.mime_type}`); } const mimeType = String(file.mime_type); contentBlocks.push({ type: 'image', source: { type: 'base64', data: await readStreamAsBase64(await file.getStream()), media_type: mimeType } }); } else if (!restrictedTypes) { if (file.mime_type === "application/pdf") { contentBlocks.push({ title: file.name, type: 'document', source: { type: 'base64', data: await readStreamAsBase64(await file.getStream()), media_type: 'application/pdf' } }); } else if (file.mime_type?.startsWith("text/")) { contentBlocks.push({ title: file.name, type: 'document', source: { type: 'text', data: await readStreamAsString(await file.getStream()), media_type: 'text/plain' } }); } } } return contentBlocks; } export class ClaudeModelDefinition { model; constructor(modelId) { this.model = { id: modelId, name: modelId, provider: 'vertexai', type: ModelType.Text, can_stream: true, }; } async createPrompt(_driver, segments, options) { // Convert the prompt to the format expected by the Claude API let system = segments .filter(segment => segment.role === PromptRole.system) .map(segment => ({ text: segment.content, type: 'text' })); if (options.result_schema) { let schemaText = ''; if (options.tools && options.tools.length > 0) { schemaText = "When not calling tools, the answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema); } else { schemaText = "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema); } const schemaSegments = { text: schemaText, type: 'text' }; system.push(schemaSegments); } let messages = []; const safetyMessages = []; for (const segment of segments) { if (segment.role === PromptRole.system) { continue; } if (segment.role === PromptRole.tool) { if (!segment.tool_use_id) { throw new Error("Tool prompt segment must have a tool use ID"); } // Build content blocks for tool results (restricted types) const contentBlocks = []; if (segment.content) { contentBlocks.push({ type: 'text', text: segment.content }); } // Collect file blocks with type safety const fileBlocks = await collectFileBlocks(segment, true); contentBlocks.push(...fileBlocks); messages.push({ role: 'user', content: [{ type: 'tool_result', tool_use_id: segment.tool_use_id, content: contentBlocks, }] }); } else { // Build content blocks for regular messages (all types allowed) const contentBlocks = []; if (segment.content) { contentBlocks.push({ type: 'text', text: segment.content }); } // Collect file blocks without restrictions const fileBlocks = await collectFileBlocks(segment, false); contentBlocks.push(...fileBlocks); if (contentBlocks.length === 0) { continue; // skip empty segments } const messageParam = { role: segment.role === PromptRole.assistant ? 'assistant' : 'user', content: contentBlocks }; if (segment.role === PromptRole.safety) { safetyMessages.push(messageParam); } else { messages.push(messageParam); } } } messages = messages.concat(safetyMessages); if (system && system.length === 0) { system = undefined; // If system is empty, set to undefined } return { messages: messages, system: system }; } async requestTextCompletion(driver, prompt, options) { const splits = options.model.split("/"); let region = undefined; if (splits[0] === "locations" && splits.length >= 2) { region = splits[1]; } const modelName = splits[splits.length - 1]; options = { ...options, model: modelName }; const client = await driver.getAnthropicClient(region); options.model_options = options.model_options; if (options.model_options?._option_id !== "vertexai-claude") { driver.logger.warn({ options: options.model_options }, "Invalid model options"); } let conversation = updateConversation(options.conversation, prompt); const { payload, requestOptions } = getClaudePayload(options, conversation); // disable streaming, the create function is overloaded so payload type matters. const nonStreamingPayload = { ...payload, stream: false }; const result = await client.messages.create(nonStreamingPayload, requestOptions); // Use the new function to collect text content, including thinking if enabled const includeThoughts = options.model_options?.include_thoughts ?? false; const text = collectAllTextContent(result.content, includeThoughts); const tool_use = collectTools(result.content); conversation = updateConversation(conversation, createPromptFromResponse(result)); return { result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }], tool_use, token_usage: { prompt: result.usage.input_tokens, result: result.usage.output_tokens, total: result.usage.input_tokens + result.usage.output_tokens }, // make sure we set finish_reason to the correct value (claude is normally setting this by itself) finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''), conversation }; } async requestTextCompletionStream(driver, prompt, options) { const splits = options.model.split("/"); let region = undefined; if (splits[0] === "locations" && splits.length >= 2) { region = splits[1]; } const modelName = splits[splits.length - 1]; options = { ...options, model: modelName }; const client = await driver.getAnthropicClient(region); const model_options = options.model_options; if (model_options?._option_id !== "vertexai-claude") { driver.logger.warn({ options: options.model_options }, "Invalid model options"); } const { payload, requestOptions } = getClaudePayload(options, prompt); const streamingPayload = { ...payload, stream: true }; const response_stream = await client.messages.stream(streamingPayload, requestOptions); const stream = asyncMap(response_stream, async (streamEvent) => { switch (streamEvent.type) { case "message_start": return { result: [{ type: "text", value: '' }], token_usage: { prompt: streamEvent.message.usage.input_tokens, result: streamEvent.message.usage.output_tokens } }; case "message_delta": return { result: [{ type: "text", value: '' }], token_usage: { result: streamEvent.usage.output_tokens }, finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined), }; case "content_block_start": // Handle redacted thinking blocks if (streamEvent.content_block.type === "redacted_thinking" && model_options?.include_thoughts) { return { result: [{ type: "text", value: `[Redacted thinking: ${streamEvent.content_block.data}]` }] }; } break; case "content_block_delta": // Handle different delta types switch (streamEvent.delta.type) { case "text_delta": return { result: streamEvent.delta.text ? [{ type: "text", value: streamEvent.delta.text }] : [] }; case "thinking_delta": if (model_options?.include_thoughts) { return { result: streamEvent.delta.thinking ? [{ type: "text", value: streamEvent.delta.thinking }] : [], }; } break; case "signature_delta": // Signature deltas, signify the end of the thoughts. if (model_options?.include_thoughts) { return { result: [{ type: "text", value: '\n\n' }], // Double newline for more spacing }; } break; } break; case "content_block_stop": // Handle the end of content blocks, for redacted thinking blocks if (model_options?.include_thoughts) { return { result: [{ type: "text", value: '\n\n' }] // Add double newline for spacing }; } break; } // Default case for all other event types return { result: [] }; }); return stream; } } function createPromptFromResponse(response) { return { messages: [{ role: response.role, content: response.content, }], system: undefined }; } /** * Update the conversation messages * @param prompt * @param response * @returns */ function updateConversation(conversation, prompt) { const baseSystemMessages = conversation?.system || []; const baseMessages = conversation?.messages || []; const system = baseSystemMessages.concat(prompt.system || []); return { messages: baseMessages.concat(prompt.messages || []), system: system.length > 0 ? system : undefined // If system is empty, set to undefined }; } function getClaudePayload(options, prompt) { const modelName = options.model; // Model name is already extracted in the calling methods const model_options = options.model_options; // Add beta header for Claude 3.7 models to enable 128k output tokens let requestOptions = undefined; if (modelName.includes('claude-3-7-sonnet') && ((model_options?.max_tokens ?? 0) > 64000 || (model_options?.thinking_budget_tokens ?? 0) > 64000)) { requestOptions = { headers: { 'anthropic-beta': 'output-128k-2025-02-19' } }; } const payload = { messages: prompt.messages, system: prompt.system, tools: options.tools, // we are using the same shape as claude for tools temperature: model_options?.temperature, model: modelName, max_tokens: maxToken(options), top_p: model_options?.top_p, top_k: model_options?.top_k, stop_sequences: model_options?.stop_sequence, thinking: model_options?.thinking_mode ? { budget_tokens: model_options?.thinking_budget_tokens ?? 1024, type: "enabled" } : { type: "disabled" } }; return { payload, requestOptions }; } //# sourceMappingURL=claude.js.map