@ai-sdk/google
Version:
The **[Google Generative AI provider](https://ai-sdk.dev/providers/ai-sdk-providers/google-generative-ai)** for the [AI SDK](https://ai-sdk.dev/docs) contains language model support for the [Google Generative AI](https://ai.google/discover/generativeai/)
749 lines (689 loc) • 26.8 kB
text/typescript
import type {
LanguageModelV3,
LanguageModelV3CallOptions,
LanguageModelV3FinishReason,
LanguageModelV3GenerateResult,
LanguageModelV3StreamResult,
SharedV3ProviderMetadata,
SharedV3Warning,
} from '@ai-sdk/provider';
import {
combineHeaders,
createEventSourceResponseHandler,
createJsonResponseHandler,
generateId as defaultGenerateId,
parseProviderOptions,
postJsonToApi,
resolve,
type FetchFunction,
type Resolvable,
} from '@ai-sdk/provider-utils';
import { googleFailedResponseHandler } from '../google-error';
import { buildGoogleInteractionsStreamTransform } from './build-google-interactions-stream-transform';
import { convertGoogleInteractionsUsage } from './convert-google-interactions-usage';
import { convertToGoogleInteractionsInput } from './convert-to-google-interactions-input';
import {
googleInteractionsEventSchema,
googleInteractionsResponseSchema,
} from './google-interactions-api';
import {
googleInteractionsLanguageModelOptions,
type GoogleInteractionsModelId,
} from './google-interactions-language-model-options';
import type {
GoogleInteractionsAgentConfig,
GoogleInteractionsEnvironmentSource,
GoogleInteractionsGenerationConfig,
GoogleInteractionsNetworkAllowlistEntry,
GoogleInteractionsNetworkConfig,
GoogleInteractionsRequestBody,
GoogleInteractionsResponseFormatEntry,
GoogleInteractionsTool,
GoogleInteractionsToolChoice,
} from './google-interactions-prompt';
import { mapGoogleInteractionsFinishReason } from './map-google-interactions-finish-reason';
import { parseGoogleInteractionsOutputs } from './parse-google-interactions-outputs';
import {
isTerminalStatus,
pollGoogleInteractionUntilTerminal,
} from './poll-google-interactions';
import { prepareGoogleInteractionsTools } from './prepare-google-interactions-tools';
import { streamGoogleInteractionEvents } from './stream-google-interactions';
import { synthesizeGoogleInteractionsAgentStream } from './synthesize-google-interactions-agent-stream';
export type GoogleInteractionsConfig = {
provider: string;
baseURL: string;
headers?: Resolvable<Record<string, string | undefined>>;
fetch?: FetchFunction;
generateId: () => string;
supportedUrls?: () => LanguageModelV3['supportedUrls'];
};
export type GoogleInteractionsModelInput =
| GoogleInteractionsModelId
| { agent: string }
| { managedAgent: string };
export class GoogleInteractionsLanguageModel implements LanguageModelV3 {
readonly specificationVersion = 'v3';
readonly modelId: string;
/**
* Optional agent name. When provided, the request body sends `agent:` instead
* of `model:` and rejects `tools` / `generation_config` (warned, not thrown).
*/
readonly agent: string | undefined;
private readonly config: GoogleInteractionsConfig;
constructor(
modelOrAgent: GoogleInteractionsModelInput,
config: GoogleInteractionsConfig,
) {
if (typeof modelOrAgent === 'string') {
this.modelId = modelOrAgent;
this.agent = undefined;
} else if ('managedAgent' in modelOrAgent) {
this.modelId = modelOrAgent.managedAgent;
this.agent = modelOrAgent.managedAgent;
} else {
this.modelId = modelOrAgent.agent;
this.agent = modelOrAgent.agent;
}
this.config = config;
}
get provider(): string {
return this.config.provider;
}
get supportedUrls() {
if (this.config.supportedUrls) {
return this.config.supportedUrls();
}
return {
'image/*': [/^https?:\/\/.+/],
'application/pdf': [/^https?:\/\/.+/],
'audio/*': [/^https?:\/\/.+/],
'video/*': [
/^https?:\/\/(www\.)?youtube\.com\/watch\?v=.+/,
/^https?:\/\/youtu\.be\/.+/,
/^gs:\/\/.+/,
],
};
}
private async getArgs(options: LanguageModelV3CallOptions) {
const warnings: Array<SharedV3Warning> = [];
const opts = await parseProviderOptions({
provider: 'google',
providerOptions: options.providerOptions,
schema: googleInteractionsLanguageModelOptions,
});
const isAgent = this.agent != null;
const hasTools = options.tools != null && options.tools.length > 0;
let toolsForBody: Array<GoogleInteractionsTool> | undefined;
let toolChoiceForBody: GoogleInteractionsToolChoice | undefined;
if (hasTools && isAgent) {
warnings.push({
type: 'other',
message:
'google.interactions: tools are not supported when an agent is set; tools will be omitted from the request body.',
});
} else if (hasTools) {
const prepared = prepareGoogleInteractionsTools({
tools: options.tools,
toolChoice: options.toolChoice,
});
toolsForBody = prepared.tools;
toolChoiceForBody = prepared.toolChoice;
warnings.push(...prepared.toolWarnings);
}
/*
* `response_format` is a polymorphic array of entries. Three sources
* contribute, in order:
*
* 1. AI SDK call-level `responseFormat: { type: 'json', schema }` →
* `{ type: 'text', mime_type: 'application/json', schema }`.
* 2. `providerOptions.google.responseFormat` (primary path) — entries
* are appended verbatim with camelCase → snake_case translation.
* 3. `providerOptions.google.imageConfig` (deprecated fallback) — only
* contributes if no `{type:'image'}` entry was already provided via
* sources 1 or 2; emits a deprecation warning when used.
*
* Agent calls cannot send `generation_config` and (per the API) cannot
* combine with structured output — emit a warning and drop the field.
*/
const responseFormatEntries: Array<GoogleInteractionsResponseFormatEntry> =
[];
if (options.responseFormat?.type === 'json') {
if (isAgent) {
warnings.push({
type: 'other',
message:
'google.interactions: structured output (responseFormat) is not supported when an agent is set; responseFormat will be ignored.',
});
} else {
const entry: GoogleInteractionsResponseFormatEntry = {
type: 'text',
mime_type: 'application/json',
...(options.responseFormat.schema != null
? { schema: options.responseFormat.schema }
: {}),
};
responseFormatEntries.push(entry);
}
}
if (opts?.responseFormat != null) {
for (const entry of opts.responseFormat) {
if (entry.type === 'text') {
responseFormatEntries.push(
pruneUndefined({
type: 'text' as const,
mime_type: entry.mimeType ?? undefined,
schema: entry.schema ?? undefined,
}),
);
} else if (entry.type === 'image') {
responseFormatEntries.push(
pruneUndefined({
type: 'image' as const,
mime_type: entry.mimeType ?? undefined,
aspect_ratio: entry.aspectRatio ?? undefined,
image_size: entry.imageSize ?? undefined,
}),
);
} else if (entry.type === 'audio') {
responseFormatEntries.push(
pruneUndefined({
type: 'audio' as const,
mime_type: entry.mimeType ?? undefined,
}),
);
}
}
}
const {
input,
systemInstruction: convertedSystemInstruction,
warnings: convWarnings,
} = convertToGoogleInteractionsInput({
prompt: options.prompt,
previousInteractionId: opts?.previousInteractionId ?? undefined,
store: opts?.store ?? undefined,
mediaResolution: opts?.mediaResolution ?? undefined,
});
warnings.push(...convWarnings);
let systemInstruction = convertedSystemInstruction;
const optionSystemInstruction = opts?.systemInstruction ?? undefined;
if (systemInstruction != null && optionSystemInstruction != null) {
warnings.push({
type: 'other',
message:
'google.interactions: both AI SDK system message and providerOptions.google.systemInstruction were set; using the AI SDK system message.',
});
} else if (systemInstruction == null && optionSystemInstruction != null) {
systemInstruction = optionSystemInstruction;
}
/*
* The Interactions API splits per-call config into `generation_config`
* (model branch) and `agent_config` (agent branch); the two are mutually
* exclusive. The AI SDK call-level generation params and the thinking /
* imageConfig provider options flow into `generation_config`.
*
* When an agent is set, none of these fields are accepted by the API.
* Emit a single `LanguageModelV3CallWarning` listing the dropped field
* names and continue (do not throw); the agent-only `agent_config`
* field supersedes them.
*/
let generationConfig: GoogleInteractionsGenerationConfig | undefined;
if (isAgent) {
const droppedFields: Array<string> = [];
if (options.temperature != null) droppedFields.push('temperature');
if (options.topP != null) droppedFields.push('topP');
if (options.seed != null) droppedFields.push('seed');
if (options.stopSequences != null && options.stopSequences.length > 0) {
droppedFields.push('stopSequences');
}
if (options.maxOutputTokens != null)
droppedFields.push('maxOutputTokens');
if (opts?.thinkingLevel != null) droppedFields.push('thinkingLevel');
if (opts?.thinkingSummaries != null) {
droppedFields.push('thinkingSummaries');
}
if (opts?.imageConfig != null) droppedFields.push('imageConfig');
if (droppedFields.length > 0) {
warnings.push({
type: 'other',
message: `google.interactions: ${droppedFields.join(', ')} ${droppedFields.length === 1 ? 'is' : 'are'} not supported when an agent is set; use providerOptions.google.agentConfig instead. Dropped from the request body.`,
});
}
generationConfig = undefined;
} else {
generationConfig = pruneUndefined({
temperature: options.temperature ?? undefined,
top_p: options.topP ?? undefined,
seed: options.seed ?? undefined,
stop_sequences:
options.stopSequences != null && options.stopSequences.length > 0
? options.stopSequences
: undefined,
max_output_tokens: options.maxOutputTokens ?? undefined,
thinking_level: opts?.thinkingLevel ?? undefined,
thinking_summaries: opts?.thinkingSummaries ?? undefined,
tool_choice: toolChoiceForBody,
});
/*
* Deprecated fallback path: `imageConfig` contributes an image entry
* only when none was supplied via `responseFormat`. A warning is
* always emitted when `imageConfig` is set so callers migrate to the
* `responseFormat` shape.
*/
if (opts?.imageConfig != null) {
const alreadyHasImageEntry = responseFormatEntries.some(
entry => entry.type === 'image',
);
warnings.push({
type: 'other',
message: alreadyHasImageEntry
? 'google.interactions: providerOptions.google.imageConfig is deprecated and was ignored because providerOptions.google.responseFormat already supplies an image entry. Use responseFormat exclusively.'
: 'google.interactions: providerOptions.google.imageConfig is deprecated. Use providerOptions.google.responseFormat with a { type: "image", ... } entry instead.',
});
if (!alreadyHasImageEntry) {
responseFormatEntries.push({
type: 'image',
mime_type: 'image/png',
...(opts.imageConfig.aspectRatio != null
? { aspect_ratio: opts.imageConfig.aspectRatio }
: {}),
...(opts.imageConfig.imageSize != null
? { image_size: opts.imageConfig.imageSize }
: {}),
});
}
}
}
let agentConfig: GoogleInteractionsAgentConfig | undefined;
if (isAgent && opts?.agentConfig != null) {
const ac = opts.agentConfig;
if (ac.type === 'deep-research') {
agentConfig = pruneUndefined({
type: 'deep-research',
thinking_summaries: ac.thinkingSummaries ?? undefined,
visualization: ac.visualization ?? undefined,
collaborative_planning: ac.collaborativePlanning ?? undefined,
}) as GoogleInteractionsAgentConfig;
} else if (ac.type === 'dynamic') {
agentConfig = { type: 'dynamic' };
}
}
let environment: GoogleInteractionsRequestBody['environment'];
if (opts?.environment != null) {
if (!isAgent) {
warnings.push({
type: 'other',
message:
'google.interactions: environment is only supported when an agent is set; environment will be omitted from the request body.',
});
} else if (typeof opts.environment === 'string') {
environment = opts.environment;
} else {
const env = opts.environment;
const sources: Array<GoogleInteractionsEnvironmentSource> | undefined =
env.sources?.map(s => {
if (s.type === 'inline') {
return {
type: 'inline' as const,
content: s.content,
target: s.target,
};
}
return pruneUndefined({
type: s.type,
source: s.source,
target: s.target ?? undefined,
}) as GoogleInteractionsEnvironmentSource;
});
let network: GoogleInteractionsNetworkConfig | undefined;
if (env.network === 'disabled') {
network = 'disabled';
} else if (env.network != null) {
network = {
allowlist: env.network.allowlist.map(entry =>
pruneUndefined({
domain: entry.domain,
transform: entry.transform ?? undefined,
}),
) as Array<GoogleInteractionsNetworkAllowlistEntry>,
};
}
environment = pruneUndefined({
type: 'remote' as const,
sources: sources != null && sources.length > 0 ? sources : undefined,
network,
});
}
}
/*
* `background` is opt-in via `providerOptions.google.background`. Some
* agents require it because their server-side workflow cannot complete
* within a single request; others reject it. When `background: true`, the
* POST returns a non-terminal status and the SDK polls
* `GET /interactions/{id}` until the work completes.
*/
const args: GoogleInteractionsRequestBody = pruneUndefined({
...(isAgent ? { agent: this.agent } : { model: this.modelId }),
input,
system_instruction: systemInstruction,
tools: toolsForBody,
response_format:
responseFormatEntries.length > 0 ? responseFormatEntries : undefined,
response_modalities:
opts?.responseModalities != null
? (opts.responseModalities as Array<
'text' | 'image' | 'audio' | 'video' | 'document'
>)
: undefined,
previous_interaction_id: opts?.previousInteractionId ?? undefined,
service_tier: opts?.serviceTier ?? undefined,
store: opts?.store ?? undefined,
generation_config:
generationConfig != null && Object.keys(generationConfig).length > 0
? generationConfig
: undefined,
agent_config: agentConfig,
environment,
background: opts?.background ?? undefined,
});
return {
args,
warnings,
isAgent,
isBackground: opts?.background === true,
pollingTimeoutMs: opts?.pollingTimeoutMs ?? undefined,
};
}
async doGenerate(
options: LanguageModelV3CallOptions,
): Promise<LanguageModelV3GenerateResult> {
const { args, warnings, isAgent, pollingTimeoutMs } =
await this.getArgs(options);
const url = `${this.config.baseURL}/interactions`;
const mergedHeaders = combineHeaders(
INTERACTIONS_API_REVISION_HEADER,
this.config.headers ? await resolve(this.config.headers) : undefined,
options.headers,
);
const postResult = await postJsonToApi({
url,
headers: mergedHeaders,
body: args,
failedResponseHandler: googleFailedResponseHandler,
successfulResponseHandler: createJsonResponseHandler(
googleInteractionsResponseSchema,
),
abortSignal: options.abortSignal,
fetch: this.config.fetch,
});
let {
responseHeaders,
value: response,
rawValue: rawResponse,
} = postResult;
/*
* Agent calls may return a non-terminal status (`in_progress` /
* `requires_action`) when invoked with `background: true`. Poll
* `GET /interactions/{id}` until terminal so the user-facing surface
* matches a synchronous call.
*/
if (isAgent && !isTerminalStatus(response.status)) {
const polled = await pollGoogleInteractionUntilTerminal({
baseURL: this.config.baseURL,
interactionId: response.id,
headers: mergedHeaders,
fetch: this.config.fetch,
abortSignal: options.abortSignal,
timeoutMs: pollingTimeoutMs,
});
response = polled.response;
rawResponse = polled.rawResponse;
responseHeaders = polled.responseHeaders ?? responseHeaders;
}
/*
* `response.id` is omitted when `store: false` (fully stateless mode), and
* the stream surface returns `id: ""` (empty string) for the same case.
* Normalize both to `undefined` so downstream stamping does not pollute
* provider metadata with an empty/missing identifier.
*/
const interactionId =
typeof response.id === 'string' && response.id.length > 0
? response.id
: undefined;
const { content, hasFunctionCall } = parseGoogleInteractionsOutputs({
steps: response.steps ?? null,
generateId: this.config.generateId ?? defaultGenerateId,
interactionId,
});
const finishReason: LanguageModelV3FinishReason = {
unified: mapGoogleInteractionsFinishReason({
status: response.status,
hasFunctionCall,
}),
raw: response.status,
};
/*
* Service tier divergence vs. `:generateContent`:
*
* `google-language-model.ts` reads the applied service tier from the
* `x-gemini-service-tier` HTTP response header (see commit 1adfb76d2d).
* The Interactions API does NOT surface that header; it returns the
* applied tier in the response body as `service_tier` on the top-level
* Interaction object (and on `interaction.complete.interaction` for
* streaming). The `responseHeaders` parameter is also checked as a
* defensive fallback in case the API later adds the header.
*/
const serviceTier =
response.service_tier ??
responseHeaders?.['x-gemini-service-tier'] ??
undefined;
/*
* `response.id` is omitted when `store: false` (fully stateless mode), so
* `interactionId` is only surfaced when the API actually returned one.
*/
const providerMetadata: SharedV3ProviderMetadata = {
google: {
...(interactionId != null ? { interactionId } : {}),
...(serviceTier != null ? { serviceTier } : {}),
},
};
let timestamp: Date | undefined;
if (typeof response.created === 'string') {
const parsed = new Date(response.created);
if (!Number.isNaN(parsed.getTime())) {
timestamp = parsed;
}
}
return {
content,
finishReason,
usage: convertGoogleInteractionsUsage(response.usage),
warnings,
providerMetadata,
request: { body: args },
response: {
headers: responseHeaders,
body: rawResponse,
...(interactionId != null ? { id: interactionId } : {}),
...(timestamp ? { timestamp } : {}),
modelId: response.model ?? undefined,
},
};
}
async doStream(
options: LanguageModelV3CallOptions,
): Promise<LanguageModelV3StreamResult> {
const { args, warnings, isBackground, pollingTimeoutMs } =
await this.getArgs(options);
const url = `${this.config.baseURL}/interactions`;
const mergedHeaders = combineHeaders(
INTERACTIONS_API_REVISION_HEADER,
this.config.headers ? await resolve(this.config.headers) : undefined,
options.headers,
);
/*
* `background: true` is incompatible with `stream: true` on POST. Drive
* background calls via POST background -> GET stream (with terminal-status
* short-circuit). The user-facing stream surface stays identical --
* text-start / text-delta / text-end / finish parts are emitted in the
* same order as a true SSE response.
*/
if (isBackground) {
return this.doStreamBackground({
args,
warnings,
url,
mergedHeaders,
options,
pollingTimeoutMs,
});
}
const body = { ...args, stream: true };
const { responseHeaders, value: response } = await postJsonToApi({
url,
headers: mergedHeaders,
body,
failedResponseHandler: googleFailedResponseHandler,
successfulResponseHandler: createEventSourceResponseHandler(
googleInteractionsEventSchema,
),
abortSignal: options.abortSignal,
fetch: this.config.fetch,
});
/*
* Google's API surfaces the applied service tier in the
* `x-gemini-service-tier` HTTP response header, not in the response body.
* Mirror the canonical pattern from `google-language-model.ts` (commit
* 1adfb76d2d) and pipe it through the stream transformer so the `finish`
* part's `providerMetadata.google.serviceTier` is sourced from the header.
*/
const headerServiceTier = responseHeaders?.['x-gemini-service-tier'];
const transform = buildGoogleInteractionsStreamTransform({
warnings,
generateId: this.config.generateId ?? defaultGenerateId,
includeRawChunks: options.includeRawChunks,
serviceTier: headerServiceTier,
});
return {
stream: response.pipeThrough(transform),
request: { body },
response: { headers: responseHeaders },
};
}
/*
* Drive the streaming surface for agent calls. Agents require
* `background: true`, which is incompatible with `stream: true` on POST.
*
* Approach:
* 1. POST `/interactions` with `background: true`. The response includes
* the interaction id and an initial (usually non-terminal) status.
* 2. If the POST status is already terminal (rare), synthesize a stream
* from the polled outputs and we're done.
* 3. Otherwise open `GET /interactions/{id}?stream=true` and pipe the
* SSE events through `buildGoogleInteractionsStreamTransform` so the
* consumer receives text deltas / thinking summaries / tool events as
* they happen instead of all at once at the end.
*
* The SSE connection can drop while the agent idles between events
* (`UND_ERR_BODY_TIMEOUT`); `streamGoogleInteractionEvents` handles the
* reconnect-with-`last_event_id` loop transparently.
*/
private async doStreamBackground({
args,
warnings,
url,
mergedHeaders,
options,
pollingTimeoutMs,
}: {
args: GoogleInteractionsRequestBody;
warnings: Array<SharedV3Warning>;
url: string;
mergedHeaders: Record<string, string | undefined>;
options: LanguageModelV3CallOptions;
pollingTimeoutMs: number | undefined;
}): Promise<LanguageModelV3StreamResult> {
const postResult = await postJsonToApi({
url,
headers: mergedHeaders,
body: args,
failedResponseHandler: googleFailedResponseHandler,
successfulResponseHandler: createJsonResponseHandler(
googleInteractionsResponseSchema,
),
abortSignal: options.abortSignal,
fetch: this.config.fetch,
});
const { responseHeaders: postHeaders, value: postResponse } = postResult;
const interactionId = postResponse.id;
if (interactionId == null || interactionId.length === 0) {
throw new Error(
'google.interactions: background POST response did not include an interaction id; cannot stream the result.',
);
}
const headerServiceTier = postHeaders?.['x-gemini-service-tier'];
/*
* If the POST already returned a terminal status (e.g. cached, immediate
* failure, or `incomplete`), there is nothing to stream from the GET --
* synthesize directly from the response so the caller still gets a
* complete stream.
*/
if (isTerminalStatus(postResponse.status)) {
const synthesized = synthesizeGoogleInteractionsAgentStream({
response: postResponse,
warnings,
generateId: this.config.generateId ?? defaultGenerateId,
includeRawChunks: options.includeRawChunks,
headerServiceTier,
});
return {
stream: synthesized,
request: { body: args },
response: { headers: postHeaders },
};
}
/*
* `pollingTimeoutMs` is unused on the live-SSE path -- there's no poll
* loop to time out -- but we surface it as the per-attempt timeout for
* the AbortSignal-driven cancel that the caller already controls. Future
* iterations may use it as a backstop if the SSE+resume loop spins
* indefinitely.
*/
void pollingTimeoutMs;
const events = streamGoogleInteractionEvents({
baseURL: this.config.baseURL,
interactionId,
headers: mergedHeaders,
fetch: this.config.fetch,
abortSignal: options.abortSignal,
});
const transform = buildGoogleInteractionsStreamTransform({
warnings,
generateId: this.config.generateId ?? defaultGenerateId,
includeRawChunks: options.includeRawChunks,
serviceTier: headerServiceTier,
});
return {
stream: events.pipeThrough(transform),
request: { body: args },
response: { headers: postHeaders },
};
}
}
/*
* Pins the Interactions API revision the SDK targets. Sent on every request
* the model issues so model-id calls, agent calls, polling, SSE reconnects,
* and cancellation all hit the same schema.
*/
const INTERACTIONS_API_REVISION_HEADER: Record<string, string> = {
'Api-Revision': '2026-05-20',
};
function pruneUndefined<T extends Record<string, unknown>>(obj: T): T {
const result: Record<string, unknown> = {};
for (const [key, value] of Object.entries(obj)) {
if (value === undefined) continue;
result[key] = value;
}
return result as T;
}