UNPKG

@ai-sdk/google

Version:

The **[Google Generative AI provider](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [Google Generative AI](https://ai.google/discover/generativeai/)

264 lines (252 loc) 8.57 kB
import type { JSONValue, LanguageModelV3Content } from '@ai-sdk/provider'; import type { GoogleInteractionsStep } from './google-interactions-api'; import { annotationsToSources, builtinToolResultToSources, } from './extract-google-interactions-sources'; import type { GoogleInteractionsAnnotation, GoogleInteractionsBuiltinToolResultContent, } from './google-interactions-prompt'; export type ParseGoogleInteractionsOutputsResult = { content: Array<LanguageModelV3Content>; hasFunctionCall: boolean; }; /* * Builds a `providerMetadata.google` payload for an output part so the * Interactions converter on the next turn can read both the per-step * `signature` (round-trip) and the parent `interactionId` (history compaction * under `previousInteractionId`). */ function googleProviderMetadata({ signature, interactionId, }: { signature?: string | null; interactionId?: string; }): { providerMetadata: { google: Record<string, string> } } | object { const google: Record<string, string> = {}; if (signature != null) { google.signature = signature; } if (interactionId != null) { google.interactionId = interactionId; } return Object.keys(google).length > 0 ? { providerMetadata: { google } } : {}; } const BUILTIN_TOOL_CALL_TYPES = new Set([ 'google_search_call', 'code_execution_call', 'url_context_call', 'file_search_call', 'google_maps_call', 'mcp_server_tool_call', ]); const BUILTIN_TOOL_RESULT_TYPES = new Set([ 'google_search_result', 'code_execution_result', 'url_context_result', 'file_search_result', 'google_maps_result', 'mcp_server_tool_result', ]); function builtinToolNameFromCallType(type: string): string { return type.replace(/_call$/, ''); } function builtinToolNameFromResultType(type: string): string { return type.replace(/_result$/, ''); } /** * Walks the `steps[]` array of an Interactions response and emits AI SDK * `LanguageModelV3Content[]`. Surfaces: * * - `model_output` steps: iterates `step.content[]` for `text` (with * annotations → source parts) and `image` content blocks. * - `thought` steps: emits a single `reasoning` part from `summary[*]`. * - `function_call` steps: emits a `tool-call` part directly. * - Built-in tool `*_call` / `*_result` steps (Google Search, Code Execution, * URL Context, File Search, Google Maps, MCP Server): emits * `tool-call`/`tool-result` parts with `providerExecuted: true`. * - `user_input` steps are skipped (they echo the client's input). */ export function parseGoogleInteractionsOutputs({ steps, generateId, interactionId, }: { steps: Array<GoogleInteractionsStep> | null | undefined; generateId: () => string; /** * Top-level `Interaction.id` on the response. Stamped onto each output * part's `providerMetadata.google.interactionId` so the converter can drop * matching assistant turns when `previousInteractionId` is used on the * next turn (compaction). */ interactionId?: string; }): ParseGoogleInteractionsOutputsResult { const content: Array<LanguageModelV3Content> = []; let hasFunctionCall = false; if (steps == null) { return { content, hasFunctionCall }; } for (const step of steps) { if (step == null || typeof step !== 'object') continue; const type = (step as { type?: string }).type; if (typeof type !== 'string') continue; switch (type) { case 'user_input': { break; } case 'model_output': { const blocks = (step as { content?: Array<{ type?: string; [k: string]: unknown }> }) .content ?? []; for (const block of blocks) { if (block == null || typeof block !== 'object') continue; const blockType = block.type; if (blockType === 'text') { const text = (block as { text?: string }).text ?? ''; const annotations = ( block as { annotations?: Array<GoogleInteractionsAnnotation>; } ).annotations; content.push({ type: 'text', text, ...googleProviderMetadata({ interactionId }), }); const sources = annotationsToSources({ annotations, generateId }); for (const source of sources) { content.push(source); } } else if (blockType === 'image') { const image = block as { data?: string; mime_type?: string; uri?: string; }; if (image.data != null && image.data.length > 0) { content.push({ type: 'file', mediaType: image.mime_type ?? 'image/png', data: image.data, ...googleProviderMetadata({ interactionId }), }); } else if (image.uri != null && image.uri.length > 0) { /* * V3 `LanguageModelV3File` only supports inline data (`string` / * `Uint8Array`). URL-only image outputs cannot be represented as * a file content part on the v3 spec; surface the URI through * provider metadata so callers can still recover it. */ content.push({ type: 'file', mediaType: image.mime_type ?? 'image/png', data: '', providerMetadata: { google: { ...(interactionId != null ? { interactionId } : {}), imageUri: image.uri, }, }, }); } } } break; } case 'thought': { const thought = step as { signature?: string; summary?: Array<{ type: string; text?: string }>; }; const summary = Array.isArray(thought.summary) ? thought.summary : []; const text = summary .filter( item => item?.type === 'text' && typeof item.text === 'string', ) .map(item => item.text as string) .join('\n'); content.push({ type: 'reasoning', text, ...googleProviderMetadata({ signature: thought.signature, interactionId, }), }); break; } case 'function_call': { hasFunctionCall = true; const call = step as { id: string; name: string; arguments?: Record<string, unknown> | null; signature?: string | null; }; content.push({ type: 'tool-call', toolCallId: call.id, toolName: call.name, input: JSON.stringify(call.arguments ?? {}), ...googleProviderMetadata({ signature: call.signature, interactionId, }), }); break; } default: { if (BUILTIN_TOOL_CALL_TYPES.has(type)) { const call = step as { id?: string; arguments?: Record<string, unknown>; name?: string; server_name?: string; }; const toolName = type === 'mcp_server_tool_call' ? (call.name ?? 'mcp_server_tool') : builtinToolNameFromCallType(type); const input = JSON.stringify(call.arguments ?? {}); content.push({ type: 'tool-call', toolCallId: call.id ?? generateId(), toolName, input, providerExecuted: true, }); } else if (BUILTIN_TOOL_RESULT_TYPES.has(type)) { const result = step as { call_id?: string; result?: unknown; is_error?: boolean; name?: string; }; const toolName = type === 'mcp_server_tool_result' ? (result.name ?? 'mcp_server_tool') : builtinToolNameFromResultType(type); content.push({ type: 'tool-result', toolCallId: result.call_id ?? generateId(), toolName, result: (result.result ?? null) as NonNullable<JSONValue>, }); const sources = builtinToolResultToSources({ block: step as unknown as GoogleInteractionsBuiltinToolResultContent, generateId, }); for (const source of sources) { content.push(source); } } break; } } } return { content, hasFunctionCall }; }