UNPKG

@genkit-ai/vertexai

Version:

Genkit AI framework plugin for Google Cloud Vertex AI APIs including Gemini APIs, Imagen, and more.

582 lines (538 loc) 15.7 kB
/** * Copyright 2025 Google LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import type { ContentBlock as AnthropicContent, ImageBlockParam, Message, MessageCreateParamsBase, MessageParam, TextBlock, TextBlockParam, TextDelta, ThinkingBlock, ThinkingBlockParam, Tool, ToolResultBlockParam, ToolUseBlock, ToolUseBlockParam, } from '@anthropic-ai/sdk/resources/messages'; import { AnthropicVertex } from '@anthropic-ai/vertex-sdk'; import { ActionMetadata, z, type GenerateRequest, type Part as GenkitPart, type MessageData, type ModelReference, type ModelResponseData, type Part, } from 'genkit'; import { GenerationCommonConfigSchema, ModelInfo, getBasicUsageStats, modelRef, type ModelAction, } from 'genkit/model'; import { model as pluginModel } from 'genkit/plugin'; import { getGenkitClientHeader } from '../../common/index.js'; import { PluginOptions } from './types.js'; import { checkModelName } from './utils.js'; export const ThinkingConfigSchema = z .object({ enabled: z.boolean().optional(), budgetTokens: z.number().min(1_024).optional(), adaptive: z.boolean().optional(), display: z.enum(['summarized', 'omitted']).optional(), }) .passthrough() .superRefine((value, ctx) => { if (value.enabled && value.adaptive) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['adaptive'], message: 'Cannot use both enabled and adaptive thinking modes simultaneously', }); } if (value.enabled) { if (value.budgetTokens === undefined) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['budgetTokens'], message: 'budgetTokens is required when thinking is enabled', }); } else if (!Number.isInteger(value.budgetTokens)) { ctx.addIssue({ code: z.ZodIssueCode.custom, path: ['budgetTokens'], message: 'budgetTokens must be an integer', }); } } }); export const AnthropicConfigSchema = GenerationCommonConfigSchema.extend({ location: z.string().optional(), thinking: ThinkingConfigSchema.optional().describe( 'The thinking configuration to use for the request. Thinking is a feature that allows the model to think about the request and provide a better response.' ), output_config: z .object({ effort: z.enum(['low', 'medium', 'high', 'xhigh']).optional(), }) .passthrough() .describe( 'Configuration for output generation, such as setting the effort parameter.' ) .optional(), }).passthrough(); export type AnthropicConfigSchemaType = typeof AnthropicConfigSchema; export type AnthropicConfig = z.infer<AnthropicConfigSchemaType>; // All the config schema types type ConfigSchemaType = AnthropicConfigSchemaType; function commonRef( name: string, info?: ModelInfo, configSchema: ConfigSchemaType = AnthropicConfigSchema ): ModelReference<ConfigSchemaType> { return modelRef({ name: `vertex-model-garden/${name}`, configSchema, info: info ?? { supports: { multiturn: true, media: true, tools: true, systemRole: true, output: ['text'], }, }, }); } export const GENERIC_MODEL = commonRef('anthropic'); export const KNOWN_MODELS = { 'claude-opus-4-7': commonRef('claude-opus-4-7'), 'claude-sonnet-4-6': commonRef('claude-sonnet-4-6'), 'claude-opus-4-6': commonRef('claude-opus-4-6'), 'claude-haiku-4-5@20251001': commonRef('claude-haiku-4-5@20251001'), 'claude-sonnet-4-5@20250929': commonRef('claude-sonnet-4-5@20250929'), 'claude-sonnet-4@20250514': commonRef('claude-sonnet-4@20250514'), 'claude-opus-4-5@20251101': commonRef('claude-opus-4-5@20251101'), 'claude-opus-4-1@20250805': commonRef('claude-opus-4-1@20250805'), 'claude-opus-4@20250514': commonRef('claude-opus-4@20250514'), }; export type KnownModels = keyof typeof KNOWN_MODELS; export type AnthropicModelName = `claude-${string}`; export function isAnthropicModelName( value?: string ): value is AnthropicModelName { return !!value?.startsWith('claude-'); } export function model( version: string, options: AnthropicConfig = {} ): ModelReference<AnthropicConfigSchemaType> { const name = checkModelName(version); return modelRef({ name: `vertex-model-garden/${name}`, config: options, configSchema: AnthropicConfigSchema, info: { ...GENERIC_MODEL.info, }, }); } export interface ClientOptions { location: string; // e.g. 'us-central1' or 'global' projectId: string; } export function listActions(clientOptions: ClientOptions): ActionMetadata[] { // TODO: figure out where to get the list of models. return []; } export function listKnownModels( clientOptions: ClientOptions, pluginOptions?: PluginOptions ) { return Object.keys(KNOWN_MODELS).map((name) => defineModel(name, clientOptions, pluginOptions) ); } export function defineModel( name: string, clientOptions: ClientOptions, pluginOptions?: PluginOptions ): ModelAction { const clients: Record<string, AnthropicVertex> = {}; const clientFactory = (region: string): AnthropicVertex => { if (!clients[region]) { clients[region] = new AnthropicVertex({ region: region, projectId: clientOptions.projectId, defaultHeaders: { 'X-Goog-Api-Client': getGenkitClientHeader(), }, }); } return clients[region]; }; const ref = model(name); return pluginModel( { name: ref.name, ...ref.info, configSchema: ref.configSchema, }, async (request, { streamingRequested, sendChunk }) => { const client = clientFactory( request.config?.location || clientOptions.location ); const modelVersion = checkModelName(ref.name); const anthropicRequest = toAnthropicRequest(modelVersion, request); if (!streamingRequested) { // Non-streaming const response = await client.messages.create({ ...anthropicRequest, stream: false, }); return fromAnthropicResponse(request, response); } else { // Streaming const stream = await client.messages.stream(anthropicRequest); for await (const event of stream) { if (event.type === 'content_block_delta') { sendChunk({ index: 0, content: [ { text: (event.delta as TextDelta).text, }, ], }); } } return fromAnthropicResponse(request, await stream.finalMessage()); } } ); } export function toAnthropicRequest( model: string, input: GenerateRequest<typeof AnthropicConfigSchema> ): MessageCreateParamsBase { let system: string | undefined = undefined; const messages: MessageParam[] = []; for (const msg of input.messages) { if (msg.role === 'system') { system = msg.content .map((c) => { if (!c.text) { throw new Error( 'Only text context is supported for system messages.' ); } return c.text; }) .join(); } // If the last message is a tool response, we need to add a user message. // https://docs.anthropic.com/en/docs/build-with-claude/tool-use#handling-tool-use-and-tool-result-content-blocks else if (msg.content[msg.content.length - 1].toolResponse) { messages.push({ role: 'user', content: toAnthropicContent(msg.content), }); } else { messages.push({ role: toAnthropicRole(msg.role), content: toAnthropicContent(msg.content), }); } } const { location, version, maxOutputTokens, stopSequences, temperature, topK, topP, thinking, output_config, ...restConfig } = input.config ?? {}; const request = { model, messages, // https://docs.anthropic.com/claude/docs/models-overview#model-comparison max_tokens: maxOutputTokens ?? 4096, ...restConfig, } as MessageCreateParamsBase & Record<string, any>; if (system) { request['system'] = system; } if (input.tools) { request.tools = input.tools?.map((tool) => { return { name: tool.name, description: tool.description, input_schema: tool.inputSchema, }; }) as Array<Tool>; } if (stopSequences) { request.stop_sequences = stopSequences; } if (temperature !== undefined) { request.temperature = temperature; } if (topK !== undefined) { request.top_k = topK; } if (topP !== undefined) { request.top_p = topP; } if (thinking) { const anthropicThinking = toAnthropicThinking(thinking); if (anthropicThinking) { request.thinking = anthropicThinking; } } if (output_config) { request.output_config = output_config; } return request; } function toAnthropicThinking( config: z.infer<typeof ThinkingConfigSchema> | undefined ): | { type: 'enabled'; budget_tokens: number } | { type: 'disabled' } | { type: 'adaptive'; display?: 'summarized' | 'omitted' } | undefined { if (!config) return undefined; const { enabled, budgetTokens, adaptive, display } = config; if (adaptive === true) { return { type: 'adaptive', ...(display !== undefined && { display }), }; } if (enabled === true) { if (budgetTokens === undefined) { throw new Error('budgetTokens is required when thinking is enabled'); } return { type: 'enabled', budget_tokens: budgetTokens }; } if (enabled === false) { return { type: 'disabled' }; } if (budgetTokens !== undefined) { return { type: 'enabled', budget_tokens: budgetTokens }; } return undefined; } function toAnthropicContent( content: GenkitPart[] ): Array< | TextBlockParam | ImageBlockParam | ToolUseBlockParam | ToolResultBlockParam | ThinkingBlockParam > { return content.map((p) => { if (p.reasoning) { const signature = p.metadata?.thoughtSignature; return { type: 'thinking', thinking: p.reasoning, ...(signature ? { signature } : {}), } as ThinkingBlockParam; } if (p.text) { return { type: 'text', text: p.text, }; } if (p.media) { let b64Data = p.media.url; if (b64Data.startsWith('data:')) { b64Data = b64Data.substring(b64Data.indexOf(',')! + 1); } return { type: 'image', source: { type: 'base64', data: b64Data, media_type: p.media.contentType as | 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp', }, }; } if (p.toolRequest) { return toAnthropicToolRequest(p.toolRequest); } if (p.toolResponse) { return toAnthropicToolResponse(p); } throw new Error(`Unsupported content type: ${JSON.stringify(p)}`); }); } function toAnthropicRole(role): 'user' | 'assistant' { if (role === 'model') { return 'assistant'; } if (role === 'user') { return 'user'; } if (role === 'tool') { return 'assistant'; } throw new Error(`Unsupported role type ${role}`); } function fromAnthropicTextPart(part: TextBlock): Part { return { text: part.text }; } function fromAnthropicToolCallPart(part: ToolUseBlock): Part { return { toolRequest: { name: part.name, input: part.input, ref: part.id, }, }; } function fromAnthropicThinkingPart(part: ThinkingBlock): Part { if (part.signature !== undefined) { return { reasoning: part.thinking, metadata: { thoughtSignature: part.signature }, }; } return { reasoning: part.thinking }; } // Converts an Anthropic part to a Genkit part. function fromAnthropicPart(part: AnthropicContent): Part { if (part.type === 'text') return fromAnthropicTextPart(part); if (part.type === 'tool_use') return fromAnthropicToolCallPart(part as ToolUseBlock); if (part.type === 'thinking') return fromAnthropicThinkingPart(part as ThinkingBlock); const unknownType = (part as { type: string }).type; console.warn( `Unexpected Anthropic content block type: ${unknownType}. Returning empty text.` ); return { text: '' }; } // Converts an Anthropic response to a Genkit response. export function fromAnthropicResponse( input: GenerateRequest<typeof AnthropicConfigSchema>, response: Message ): ModelResponseData { const parts = response.content as AnthropicContent[]; const message: MessageData = { role: 'model', content: parts.map(fromAnthropicPart), }; return { message, finishReason: toGenkitFinishReason( response.stop_reason as | 'end_turn' | 'max_tokens' | 'stop_sequence' | 'tool_use' | null ), custom: { id: response.id, model: response.model, type: response.type, }, raw: response, usage: { ...getBasicUsageStats(input.messages, message), inputTokens: response.usage.input_tokens, outputTokens: response.usage.output_tokens, custom: { cache_creation_input_tokens: response.usage.cache_creation_input_tokens ?? 0, cache_read_input_tokens: response.usage.cache_read_input_tokens ?? 0, ephemeral_5m_input_tokens: response.usage.cache_creation?.ephemeral_5m_input_tokens ?? 0, ephemeral_1h_input_tokens: response.usage.cache_creation?.ephemeral_1h_input_tokens ?? 0, }, }, }; } function toGenkitFinishReason( reason: 'end_turn' | 'max_tokens' | 'stop_sequence' | 'tool_use' | null ): ModelResponseData['finishReason'] { switch (reason) { case 'end_turn': return 'stop'; case 'max_tokens': return 'length'; case 'stop_sequence': return 'stop'; case 'tool_use': return 'stop'; case null: return 'unknown'; default: return 'other'; } } function toAnthropicToolRequest(tool: Record<string, any>): ToolUseBlockParam { if (!tool.name) { throw new Error('Tool name is required'); } // Validate the tool name, Anthropic only supports letters, numbers, and underscores. // https://docs.anthropic.com/en/docs/build-with-claude/tool-use#specifying-tools if (!/^[a-zA-Z0-9_-]{1,64}$/.test(tool.name)) { throw new Error( `Tool name ${tool.name} contains invalid characters. Only letters, numbers, and underscores are allowed, and the name must be between 1 and 64 characters long.` ); } const declaration: ToolUseBlockParam = { type: 'tool_use', id: tool.ref, name: tool.name, input: tool.input, }; return declaration; } function toAnthropicToolResponse(part: Part): ToolResultBlockParam { if (!part.toolResponse?.ref) { throw new Error('Tool response reference is required'); } if (!part.toolResponse.output) { throw new Error('Tool response output is required'); } return { type: 'tool_result', tool_use_id: part.toolResponse.ref, content: JSON.stringify(part.toolResponse.output), }; }