@llumiverse/drivers
Version:
LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.
394 lines • 16.4 kB
JavaScript
import { getMaxTokensLimitVertexAi, ModelType, PromptRole, readStreamAsBase64, readStreamAsString } from "@llumiverse/core";
import { asyncMap } from "@llumiverse/core/async";
export const ANTHROPIC_REGIONS = {
us: "us-east5",
europe: "europe-west1",
global: "global",
};
export const NON_GLOBAL_ANTHROPIC_MODELS = [
"claude-3-5",
"claude-3",
];
function claudeFinishReason(reason) {
if (!reason)
return undefined;
switch (reason) {
case 'end_turn': return "stop";
case 'max_tokens': return "length";
default: return reason; //stop_sequence
}
}
export function collectTools(content) {
const out = [];
for (const block of content) {
if (block.type === "tool_use") {
out.push({
id: block.id,
tool_name: block.name,
tool_input: block.input,
});
}
}
return out.length > 0 ? out : undefined;
}
function collectAllTextContent(content, includeThoughts = false) {
const textParts = [];
// First pass: collect thinking blocks
if (includeThoughts) {
for (const block of content) {
if (block.type === 'thinking' && block.thinking) {
textParts.push(block.thinking);
}
else if (block.type === 'redacted_thinking' && block.data) {
textParts.push(`[Redacted thinking: ${block.data}]`);
}
}
if (textParts.length > 0) {
textParts.push(''); // Create a new line after thinking blocks
}
}
// Second pass: collect text blocks
for (const block of content) {
if (block.type === 'text' && block.text) {
textParts.push(block.text);
}
}
return textParts.join('\n');
}
//Used to get a max_token value when not specified in the model options. Claude requires it to be set.
function maxToken(option) {
const modelOptions = option.model_options;
if (modelOptions && typeof modelOptions.max_tokens === "number") {
return modelOptions.max_tokens;
}
else {
const thinking_budget = modelOptions?.thinking_budget_tokens ?? 0;
let maxSupportedTokens = getMaxTokensLimitVertexAi(option.model);
// Fallback to the default max tokens limit for the model
if (option.model.includes('claude-3-7-sonnet') && (modelOptions?.thinking_budget_tokens ?? 0) < 48000) {
maxSupportedTokens = 64000; // Claude 3.7 can go up to 128k with a beta header, but when no max tokens is specified, we default to 64k.
}
return Math.min(16000 + thinking_budget, maxSupportedTokens); // Cap to 16k, to avoid taking up too much context window and quota.
}
}
async function collectFileBlocks(segment, restrictedTypes = false) {
const contentBlocks = [];
for (const file of segment.files || []) {
if (file.mime_type?.startsWith("image/")) {
const allowedTypes = ["image/png", "image/jpeg", "image/gif", "image/webp"];
if (!allowedTypes.includes(file.mime_type)) {
throw new Error(`Unsupported image type: ${file.mime_type}`);
}
const mimeType = String(file.mime_type);
contentBlocks.push({
type: 'image',
source: {
type: 'base64',
data: await readStreamAsBase64(await file.getStream()),
media_type: mimeType
}
});
}
else if (!restrictedTypes) {
if (file.mime_type === "application/pdf") {
contentBlocks.push({
title: file.name,
type: 'document',
source: {
type: 'base64',
data: await readStreamAsBase64(await file.getStream()),
media_type: 'application/pdf'
}
});
}
else if (file.mime_type?.startsWith("text/")) {
contentBlocks.push({
title: file.name,
type: 'document',
source: {
type: 'text',
data: await readStreamAsString(await file.getStream()),
media_type: 'text/plain'
}
});
}
}
}
return contentBlocks;
}
export class ClaudeModelDefinition {
model;
constructor(modelId) {
this.model = {
id: modelId,
name: modelId,
provider: 'vertexai',
type: ModelType.Text,
can_stream: true,
};
}
async createPrompt(_driver, segments, options) {
// Convert the prompt to the format expected by the Claude API
let system = segments
.filter(segment => segment.role === PromptRole.system)
.map(segment => ({
text: segment.content,
type: 'text'
}));
if (options.result_schema) {
let schemaText = '';
if (options.tools && options.tools.length > 0) {
schemaText = "When not calling tools, the answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
}
else {
schemaText = "The answer must be a JSON object using the following JSON Schema:\n" + JSON.stringify(options.result_schema);
}
const schemaSegments = {
text: schemaText,
type: 'text'
};
system.push(schemaSegments);
}
let messages = [];
const safetyMessages = [];
for (const segment of segments) {
if (segment.role === PromptRole.system) {
continue;
}
if (segment.role === PromptRole.tool) {
if (!segment.tool_use_id) {
throw new Error("Tool prompt segment must have a tool use ID");
}
// Build content blocks for tool results (restricted types)
const contentBlocks = [];
if (segment.content) {
contentBlocks.push({
type: 'text',
text: segment.content
});
}
// Collect file blocks with type safety
const fileBlocks = await collectFileBlocks(segment, true);
contentBlocks.push(...fileBlocks);
messages.push({
role: 'user',
content: [{
type: 'tool_result',
tool_use_id: segment.tool_use_id,
content: contentBlocks,
}]
});
}
else {
// Build content blocks for regular messages (all types allowed)
const contentBlocks = [];
if (segment.content) {
contentBlocks.push({
type: 'text',
text: segment.content
});
}
// Collect file blocks without restrictions
const fileBlocks = await collectFileBlocks(segment, false);
contentBlocks.push(...fileBlocks);
if (contentBlocks.length === 0) {
continue; // skip empty segments
}
const messageParam = {
role: segment.role === PromptRole.assistant ? 'assistant' : 'user',
content: contentBlocks
};
if (segment.role === PromptRole.safety) {
safetyMessages.push(messageParam);
}
else {
messages.push(messageParam);
}
}
}
messages = messages.concat(safetyMessages);
if (system && system.length === 0) {
system = undefined; // If system is empty, set to undefined
}
return {
messages: messages,
system: system
};
}
async requestTextCompletion(driver, prompt, options) {
const splits = options.model.split("/");
let region = undefined;
if (splits[0] === "locations" && splits.length >= 2) {
region = splits[1];
}
const modelName = splits[splits.length - 1];
options = { ...options, model: modelName };
const client = await driver.getAnthropicClient(region);
options.model_options = options.model_options;
if (options.model_options?._option_id !== "vertexai-claude") {
driver.logger.warn({ options: options.model_options }, "Invalid model options");
}
let conversation = updateConversation(options.conversation, prompt);
const { payload, requestOptions } = getClaudePayload(options, conversation);
// disable streaming, the create function is overloaded so payload type matters.
const nonStreamingPayload = { ...payload, stream: false };
const result = await client.messages.create(nonStreamingPayload, requestOptions);
// Use the new function to collect text content, including thinking if enabled
const includeThoughts = options.model_options?.include_thoughts ?? false;
const text = collectAllTextContent(result.content, includeThoughts);
const tool_use = collectTools(result.content);
conversation = updateConversation(conversation, createPromptFromResponse(result));
return {
result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }],
tool_use,
token_usage: {
prompt: result.usage.input_tokens,
result: result.usage.output_tokens,
total: result.usage.input_tokens + result.usage.output_tokens
},
// make sure we set finish_reason to the correct value (claude is normally setting this by itself)
finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''),
conversation
};
}
async requestTextCompletionStream(driver, prompt, options) {
const splits = options.model.split("/");
let region = undefined;
if (splits[0] === "locations" && splits.length >= 2) {
region = splits[1];
}
const modelName = splits[splits.length - 1];
options = { ...options, model: modelName };
const client = await driver.getAnthropicClient(region);
const model_options = options.model_options;
if (model_options?._option_id !== "vertexai-claude") {
driver.logger.warn({ options: options.model_options }, "Invalid model options");
}
const { payload, requestOptions } = getClaudePayload(options, prompt);
const streamingPayload = { ...payload, stream: true };
const response_stream = await client.messages.stream(streamingPayload, requestOptions);
const stream = asyncMap(response_stream, async (streamEvent) => {
switch (streamEvent.type) {
case "message_start":
return {
result: [{ type: "text", value: '' }],
token_usage: {
prompt: streamEvent.message.usage.input_tokens,
result: streamEvent.message.usage.output_tokens
}
};
case "message_delta":
return {
result: [{ type: "text", value: '' }],
token_usage: {
result: streamEvent.usage.output_tokens
},
finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined),
};
case "content_block_start":
// Handle redacted thinking blocks
if (streamEvent.content_block.type === "redacted_thinking" && model_options?.include_thoughts) {
return {
result: [{ type: "text", value: `[Redacted thinking: ${streamEvent.content_block.data}]` }]
};
}
break;
case "content_block_delta":
// Handle different delta types
switch (streamEvent.delta.type) {
case "text_delta":
return {
result: streamEvent.delta.text ? [{ type: "text", value: streamEvent.delta.text }] : []
};
case "thinking_delta":
if (model_options?.include_thoughts) {
return {
result: streamEvent.delta.thinking ? [{ type: "text", value: streamEvent.delta.thinking }] : [],
};
}
break;
case "signature_delta":
// Signature deltas, signify the end of the thoughts.
if (model_options?.include_thoughts) {
return {
result: [{ type: "text", value: '\n\n' }], // Double newline for more spacing
};
}
break;
}
break;
case "content_block_stop":
// Handle the end of content blocks, for redacted thinking blocks
if (model_options?.include_thoughts) {
return {
result: [{ type: "text", value: '\n\n' }] // Add double newline for spacing
};
}
break;
}
// Default case for all other event types
return {
result: []
};
});
return stream;
}
}
function createPromptFromResponse(response) {
return {
messages: [{
role: response.role,
content: response.content,
}],
system: undefined
};
}
/**
* Update the conversation messages
* @param prompt
* @param response
* @returns
*/
function updateConversation(conversation, prompt) {
const baseSystemMessages = conversation?.system || [];
const baseMessages = conversation?.messages || [];
const system = baseSystemMessages.concat(prompt.system || []);
return {
messages: baseMessages.concat(prompt.messages || []),
system: system.length > 0 ? system : undefined // If system is empty, set to undefined
};
}
function getClaudePayload(options, prompt) {
const modelName = options.model; // Model name is already extracted in the calling methods
const model_options = options.model_options;
// Add beta header for Claude 3.7 models to enable 128k output tokens
let requestOptions = undefined;
if (modelName.includes('claude-3-7-sonnet') &&
((model_options?.max_tokens ?? 0) > 64000 || (model_options?.thinking_budget_tokens ?? 0) > 64000)) {
requestOptions = {
headers: {
'anthropic-beta': 'output-128k-2025-02-19'
}
};
}
const payload = {
messages: prompt.messages,
system: prompt.system,
tools: options.tools, // we are using the same shape as claude for tools
temperature: model_options?.temperature,
model: modelName,
max_tokens: maxToken(options),
top_p: model_options?.top_p,
top_k: model_options?.top_k,
stop_sequences: model_options?.stop_sequence,
thinking: model_options?.thinking_mode ?
{
budget_tokens: model_options?.thinking_budget_tokens ?? 1024,
type: "enabled"
} : {
type: "disabled"
}
};
return { payload, requestOptions };
}
//# sourceMappingURL=claude.js.map