UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

946 lines (945 loc) • 65.3 kB

JavaScript

import {} from "ai"; import { ErrorCategory, ErrorSeverity, GoogleAIModels, } from "../constants/enums.js"; import { BaseProvider } from "../core/baseProvider.js"; import { IMAGE_GENERATION_MODELS } from "../core/constants.js"; import { processUnifiedFilesArray } from "../utils/messageBuilder.js"; import { ATTR, tracers, withClientSpan, withClientStreamSpan, withSpan, } from "../telemetry/index.js"; import { AuthenticationError, InvalidModelError, NetworkError, ProviderError, RateLimitError, } from "../types/index.js"; import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js"; import { logger } from "../utils/logger.js"; import { composeAbortSignals, createTimeoutController, TimeoutError, } from "../utils/timeout.js"; import { estimateTokens } from "../utils/tokenEstimation.js"; import { buildGeminiResponseSchema, buildNativeConfig, buildNativeToolDeclarations, collectStreamChunks, collectStreamChunksIncremental, computeMaxSteps, createTextChannel, buildUserPartsWithMultimodal, executeNativeToolCalls, extractTextFromParts, handleMaxStepsTermination, prependConversationMessages, pushModelResponseToHistory, } from "./googleNativeGemini3.js"; import { createProxyFetch } from "../proxy/proxyFetch.js"; // Google AI Live API types now imported from ../types/providerSpecific.js // Import proper types for multimodal message handling // Create Google GenAI client async function createGoogleGenAIClient(apiKey) { const mod = await import("@google/genai"); const ctor = mod.GoogleGenAI; if (!ctor) { throw new NeuroLinkError({ code: ERROR_CODES.INVALID_CONFIGURATION, message: "@google/genai does not export GoogleGenAI", category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.CRITICAL, retriable: false, context: { module: "@google/genai", expectedExport: "GoogleGenAI" }, }); } const Ctor = ctor; // Include httpOptions with proxy fetch for corporate network support return new Ctor({ apiKey, httpOptions: { fetch: createProxyFetch(), }, }); } /** * Google AI Studio provider implementation using BaseProvider * Migrated from original GoogleAIStudio class to new factory pattern * * @important Structured Output Limitation * Google Gemini models cannot combine function calling (tools) with structured * output (JSON schema). When using schemas with output.format: "json", you MUST * set disableTools: true. * * Error without disableTools: * "Function calling with a response mime type: 'application/json' is unsupported" * * This is a Google API limitation documented at: * https://ai.google.dev/gemini-api/docs/function-calling * * @example * ```typescript * // ✅ Correct usage with schemas * const provider = new GoogleAIStudioProvider("gemini-2.5-flash"); * const result = await provider.generate({ * input: { text: "Analyze data" }, * schema: MySchema, * output: { format: "json" }, * disableTools: true // Required * }); * ``` * * @note Gemini 3 Pro Preview (November 2025) will support combining tools + schemas * @note "Too many states for serving" errors can occur with complex schemas + tools. * Solution: Simplify schema or use disableTools: true */ export class GoogleAIStudioProvider extends BaseProvider { credentials; constructor(modelName, sdk, credentials) { super(modelName, "google-ai", sdk); this.credentials = credentials; logger.debug("GoogleAIStudioProvider initialized", { model: this.modelName, provider: this.providerName, sdkProvided: !!sdk, }); } // =================== // ABSTRACT METHOD IMPLEMENTATIONS // =================== getProviderName() { return "google-ai"; } getDefaultModel() { return process.env.GOOGLE_AI_MODEL || GoogleAIModels.GEMINI_2_5_FLASH; } /** * AI SDK model instance — no longer used. * All models are routed through native @google/genai SDK directly. */ getAISDKModel() { throw new NeuroLinkError({ code: ERROR_CODES.INVALID_CONFIGURATION, message: "GoogleAIStudioProvider no longer uses @ai-sdk/google. All models use native @google/genai SDK.", category: ErrorCategory.CONFIGURATION, severity: ErrorSeverity.CRITICAL, retriable: false, context: { provider: this.providerName, model: this.modelName }, }); } formatProviderError(error) { if (error instanceof TimeoutError) { return new NetworkError(error.message, this.providerName); } const errorRecord = error; const message = typeof errorRecord?.message === "string" ? errorRecord.message : "Unknown error"; const statusCode = typeof errorRecord?.status === "number" ? errorRecord.status : typeof errorRecord?.statusCode === "number" ? errorRecord.statusCode : undefined; // Authentication errors if (message.includes("API_KEY_INVALID") || message.includes("Invalid API key") || statusCode === 401) { return new AuthenticationError("Invalid Google AI API key. Please check your GOOGLE_AI_API_KEY environment variable.", this.providerName); } // Rate limit errors if (message.includes("RATE_LIMIT_EXCEEDED") || message.includes("rate limit") || message.includes("429") || statusCode === 429) { return new RateLimitError("Google AI rate limit exceeded. Please try again later.", this.providerName); } // Model not found errors — gate on a 404 status when available; fall // back to literal phrase matching only when we have no status code at // all. Avoids misclassifying permission/validation errors that happen // to mention model resource paths (e.g. "...models/foo permission..."). if (statusCode === 404 || (statusCode === undefined && (message.includes("model not found") || message.includes("Model not found")))) { return new InvalidModelError(`Model '${this.modelName}' not found. Please check the model name and ensure it is available.`, this.providerName); } // Network connectivity errors if (message.includes("ECONNRESET") || message.includes("ENOTFOUND") || message.includes("ETIMEDOUT") || message.includes("ECONNREFUSED") || message.includes("network") || message.includes("connection")) { return new NetworkError(`Connection error: ${message}`, this.providerName); } // Server errors (5xx) if (message.includes("500") || message.includes("502") || message.includes("503") || message.includes("504") || message.includes("server error") || message.includes("Internal Server Error") || (statusCode && statusCode >= 500 && statusCode < 600)) { return new ProviderError(`Google AI server error: ${message}. Please try again later.`, this.providerName); } return new ProviderError(`Google AI error: ${message}`, this.providerName); } /** * Overrides the BaseProvider's image generation method to implement it for Google AI. * This method calls the Google AI API to generate an image from a prompt. * @param options The generation options containing the prompt. * @returns A promise that resolves to the generation result, including the image data. */ async executeImageGeneration(options) { const prompt = options.prompt || options.input?.text || ""; const imageModelName = options.model || this.modelName; const startTime = Date.now(); const apiKey = this.getApiKey(); logger.info("🎨 Starting Google AI Studio image generation", { model: imageModelName, prompt: prompt.substring(0, 100), provider: this.providerName, }); // Use the @google/genai client for image generation let client; try { client = await createGoogleGenAIClient(apiKey); } catch { throw new AuthenticationError("Missing '@google/genai'. Install with: npm install @google/genai", this.providerName); } try { // Build content array with multimodal support const imageParts = await Promise.all((options.input?.images || []).map(async (image) => { // Handle ImageWithAltText objects if (typeof image === "object" && "url" in image) { const imageUrl = image.url; if (imageUrl.startsWith("http")) { const response = await fetch(imageUrl); if (!response.ok) { throw new Error(`Failed to fetch image from ${imageUrl}: ${response.status} ${response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); const mimeType = this.detectImageType(buffer); logger.debug(`Downloaded and detected image MIME type: ${mimeType}`); return { inlineData: { mimeType, data: buffer.toString("base64"), }, }; } // Base64 URL in ImageWithAltText const buffer = Buffer.from(imageUrl, "base64"); const mimeType = this.detectImageType(buffer); return { inlineData: { mimeType, data: buffer.toString("base64"), }, }; } // Handle string URLs if (typeof image === "string" && image.startsWith("http")) { const response = await fetch(image); if (!response.ok) { throw new Error(`Failed to fetch image from ${image}: ${response.status} ${response.statusText}`); } const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); const mimeType = this.detectImageType(buffer); logger.debug(`Downloaded and detected image MIME type: ${mimeType}`); return { inlineData: { mimeType, data: buffer.toString("base64"), }, }; } // Handle Buffer or base64 string const buffer = Buffer.isBuffer(image) ? image : typeof image === "string" ? Buffer.from(image, "base64") : Buffer.from(""); // Fallback for unexpected types const mimeType = this.detectImageType(buffer); logger.debug(`Detected image MIME type: ${mimeType}`); return { inlineData: { mimeType, data: buffer.toString("base64"), }, }; })); const contents = [ { role: "user", parts: [{ text: prompt }, ...imageParts], }, ]; // Configure for image generation const generateConfig = { responseModalities: ["IMAGE", "TEXT"], // This is the key setting for image generation }; logger.debug("Starting image generation request", { model: imageModelName, contentParts: contents[0].parts.length, responseModalities: generateConfig.responseModalities, }); // Try streaming approach first let imageData = null; let textContent = ""; try { // Await the Promise to get the AsyncIterable const stream = await client.models.generateContentStream({ model: imageModelName, contents: contents, config: generateConfig, }); // Process the stream for await (const chunk of stream) { logger.debug("Received chunk", { hasCandidate: !!chunk.candidates?.[0], hasContent: !!chunk.candidates?.[0]?.content, hasParts: !!chunk.candidates?.[0]?.content?.parts, }); const candidate = chunk.candidates?.[0]; if (candidate?.content?.parts) { for (const part of candidate.content.parts) { // Check for image data if ("inlineData" in part && part.inlineData?.data) { const foundImageData = part.inlineData.data; imageData = foundImageData; const mimeType = part.inlineData.mimeType || "image/png"; logger.info("Image generation successful", { model: imageModelName, mimeType, dataLength: foundImageData.length, responseTime: Date.now() - startTime, }); const result = { content: `Generated image using ${imageModelName} (${mimeType})`, imageOutput: { base64: foundImageData, }, provider: this.providerName, model: imageModelName, usage: { input: this.estimateTokenCount(prompt), output: 0, total: this.estimateTokenCount(prompt), }, }; return await this.enhanceResult(result, options, startTime); } // Check for text content if ("text" in part && part.text) { textContent += part.text; logger.debug("Received text content", { text: part.text.substring(0, 100), }); } } } } } catch (streamError) { logger.debug("Streaming failed, trying non-streaming approach", { error: streamError instanceof Error ? streamError.message : String(streamError), }); } // If no image was found, try non-streaming approach if (!imageData) { logger.debug("Trying non-streaming approach"); const response = await client.models.generateContent({ model: imageModelName, contents: contents, config: generateConfig, }); const candidate = response.candidates?.[0]; if (candidate?.content?.parts) { for (const part of candidate.content.parts) { if ("inlineData" in part && part.inlineData?.data) { const foundImageData = part.inlineData.data; imageData = foundImageData; const mimeType = part.inlineData.mimeType || "image/png"; logger.info("Image generation successful (non-streaming)", { model: imageModelName, mimeType, dataLength: foundImageData.length, responseTime: Date.now() - startTime, }); const result = { content: `Generated image using ${imageModelName} (${mimeType})`, imageOutput: { base64: foundImageData, }, provider: this.providerName, model: imageModelName, usage: { input: this.estimateTokenCount(prompt), output: 0, total: this.estimateTokenCount(prompt), }, }; return await this.enhanceResult(result, options, startTime); } if ("text" in part && part.text) { textContent += part.text; } } } } // If we reach here, no image was generated logger.warn("No image data found in response", { model: imageModelName, prompt: prompt.substring(0, 100), hasTextContent: !!textContent, textContent: textContent.substring(0, 200), }); throw new ProviderError(textContent || `Image generation completed but no image data was returned. This may indicate an issue with the model "${imageModelName}" or the prompt: "${prompt}". Please try again or use a different model.`, this.providerName); } catch (error) { logger.error("Image generation failed", { error: error instanceof Error ? error.message : String(error), model: imageModelName, prompt: prompt.substring(0, 100), }); throw this.handleProviderError(error); } } /** * Detect image MIME type from buffer */ detectImageType(buffer) { // Check PNG signature if (buffer.length >= 8 && buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4e && buffer[3] === 0x47) { return "image/png"; } // Check JPEG signature if (buffer.length >= 3 && buffer[0] === 0xff && buffer[1] === 0xd8 && buffer[2] === 0xff) { return "image/jpeg"; } // Check WebP signature if (buffer.length >= 12 && buffer[0] === 0x52 && buffer[1] === 0x49 && buffer[2] === 0x46 && buffer[3] === 0x46 && buffer[8] === 0x57 && buffer[9] === 0x45 && buffer[10] === 0x42 && buffer[11] === 0x50) { return "image/webp"; } // Check GIF signature if (buffer.length >= 6 && buffer[0] === 0x47 && buffer[1] === 0x49 && buffer[2] === 0x46) { return "image/gif"; } // Default to PNG if unknown return "image/png"; } /** * Estimate token count from text using centralized estimation with provider multipliers */ estimateTokenCount(text) { return estimateTokens(text, "google-ai"); } // executeGenerate removed - BaseProvider handles all generation with tools async executeStream(options, analysisSchema) { const modelName = options.model || this.modelName; // Phase 1: if audio input present, bridge to Gemini Live (Studio) using @google/genai if (options.input?.audio) { return await this.executeAudioStreamViaGeminiLive(options); } // Structured output (analysisSchema, JSON format, or schema) is incompatible with tools on Gemini. const wantsStructuredOutput = analysisSchema || options.output?.format === "json" || options.schema; // Tool filter (a0269210): trust options.tools — caller (BaseProvider.stream) // already merged MCP/built-in tools with user tools and applied any // enabledToolNames filter. Re-attaching getAllTools() here would clobber // that filter and re-introduce filtered-out tools. const shouldUseTools = !options.disableTools && this.supportsTools() && !wantsStructuredOutput; const optionTools = options.tools || {}; // Merge into options for native SDK path let mergedOptions = { ...options, tools: optionTools, }; // Check for tools + JSON schema conflict (Gemini limitation) const wantsJsonOutput = options.output?.format === "json" || options.schema; if (wantsJsonOutput && mergedOptions.tools && Object.keys(mergedOptions.tools).length > 0 && !mergedOptions.disableTools) { logger.warn("[GoogleAIStudio] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request."); mergedOptions = { ...mergedOptions, disableTools: true, tools: {} }; } const hasActiveTools = shouldUseTools && !mergedOptions.disableTools && mergedOptions.tools && Object.keys(mergedOptions.tools).length > 0; if (hasActiveTools) { logger.info("[GoogleAIStudio] Routing to native @google/genai SDK for tool calling", { model: modelName, totalToolCount: Object.keys(mergedOptions.tools ?? {}).length, }); } // Route ALL models through native @google/genai SDK (no more @ai-sdk/google dependency) return this.executeNativeGemini3Stream(mergedOptions); } /** * Execute stream using native @google/genai SDK * Uses @google/genai directly for all Gemini models (2.0, 2.5, 3.x) */ async executeNativeGemini3Stream(options) { const modelName = options.model || this.modelName; return withClientStreamSpan({ name: "neurolink.provider.stream", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "google-ai", [ATTR.GEN_AI_MODEL]: modelName, [ATTR.GEN_AI_OPERATION]: "stream", [ATTR.NL_PROVIDER]: this.providerName, }, }, async (span) => { const startTime = Date.now(); const timeout = this.getTimeout(options); const timeoutController = createTimeoutController(timeout, this.providerName, "stream"); try { const apiKey = this.getApiKey(); const client = await createGoogleGenAIClient(apiKey); logger.debug("[GoogleAIStudio] Using native @google/genai for Gemini 3", { model: modelName, hasTools: !!options.tools && Object.keys(options.tools).length > 0, }); // Build contents from input. Prepend prior conversation turns so // multi-turn callers (memory, loop REPL, agent flows) actually // carry context — the previous build started fresh from the // current user input only, which silently dropped history. // // `buildUserPartsWithMultimodal` is the shared helper that also // attaches `input.images` and `input.pdfFiles` as `inlineData` // parts. The previous AI Studio path pushed only `{ text }` and // silently dropped both, which is why the model legitimately // reported "no image attached" on multimodal calls. const currentContents = []; prependConversationMessages(currentContents, options.conversationMessages); const userParts = await buildUserPartsWithMultimodal(options.input, options.input.text, "[GoogleAIStudio:stream]"); currentContents.push({ role: "user", parts: userParts, }); // Convert tools let toolsConfig; let executeMap = new Map(); let originalNameMap = new Map(); if (options.tools && Object.keys(options.tools).length > 0 && !options.disableTools) { const result = buildNativeToolDeclarations(options.tools); toolsConfig = result.toolsConfig; executeMap = result.executeMap; originalNameMap = result.originalNameMap; logger.debug("[GoogleAIStudio] Converted tools for native SDK", { toolCount: toolsConfig[0].functionDeclarations.length, toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name), }); } // Native JSON / schema enforcement: when no tools are being sent // (the AI Studio orchestrator above already force-disables tools // whenever JSON/schema output is requested), enforce the response // shape natively via responseMimeType / responseSchema. Without // this, JSON output was best-effort prompting only. const wantsNativeJson = !toolsConfig && (options.output?.format === "json" || !!options.schema); const nativeResponseSchema = wantsNativeJson && options.schema ? buildGeminiResponseSchema(options.schema) : undefined; const config = buildNativeConfig({ ...options, wantsJsonOutput: wantsNativeJson, responseSchema: nativeResponseSchema, }, toolsConfig); const maxSteps = computeMaxSteps(options.maxSteps); // Compose abort signal from user signal + timeout const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal); // Create a push-based text channel so the caller receives tokens as // they arrive from the network rather than after full buffering. const channel = createTextChannel(); // Shared mutable state updated by the background agentic loop. const allToolCalls = []; // analyticsResolvers lets the background loop settle the analytics // promise once token counts are known (after the loop completes). let analyticsResolve; let analyticsReject; const analyticsPromise = new Promise((res, rej) => { analyticsResolve = res; analyticsReject = rej; }); // Shared metadata object mutated by the background loop so the // returned object reflects the final values after stream completion. const metadata = { streamId: `native-${Date.now()}`, startTime, responseTime: 0, totalToolExecutions: 0, }; // Run the agentic loop in the background without awaiting it here, // so we can return the StreamResult (with channel.iterable) immediately. const loopPromise = (async () => { let lastStepText = ""; let totalInputTokens = 0; let totalOutputTokens = 0; let step = 0; let completedWithFinalAnswer = false; const failedTools = new Map(); try { // Agentic loop for tool calling while (step < maxSteps) { if (composedSignal?.aborted) { throw composedSignal.reason instanceof Error ? composedSignal.reason : new Error("Request aborted"); } step++; logger.debug(`[GoogleAIStudio] Native SDK step ${step}/${maxSteps}`); try { const rawStream = await client.models.generateContentStream({ model: modelName, contents: currentContents, config, ...(composedSignal ? { httpOptions: { signal: composedSignal } } : {}), }); // For every step, use incremental collection so text parts // are pushed to the channel as they arrive. For intermediate // steps (those that produce function calls) we still need the // complete rawResponseParts for pushModelResponseToHistory, // which collectStreamChunksIncremental provides at stream end. const chunkResult = await collectStreamChunksIncremental(rawStream, channel); totalInputTokens += chunkResult.inputTokens; totalOutputTokens += chunkResult.outputTokens; const stepText = extractTextFromParts(chunkResult.rawResponseParts); // If no function calls, this was the final step — channel // already received all text parts incrementally. if (chunkResult.stepFunctionCalls.length === 0) { completedWithFinalAnswer = true; break; } lastStepText = stepText; // Record tool call events on the span for (const fc of chunkResult.stepFunctionCalls) { span.addEvent("gen_ai.tool_call", { "tool.name": fc.name, "tool.step": step, }); } logger.debug(`[GoogleAIStudio] Executing ${chunkResult.stepFunctionCalls.length} function calls`); // Add model response with ALL parts (including thoughtSignature) to history pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls); const functionResponses = await executeNativeToolCalls("[GoogleAIStudio]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { abortSignal: composedSignal, originalNameMap }); // Add function responses to history — the @google/genai SDK // only accepts "user" and "model" as valid roles in contents. // Function/tool responses must use role: "user" (matching the // SDK's own automaticFunctionCalling implementation). currentContents.push({ role: "user", parts: functionResponses, }); } catch (error) { logger.error("[GoogleAIStudio] Native SDK error", error); throw this.handleProviderError(error); } } // Handle max-steps termination: if the model was still calling // tools when we hit the limit, push a synthetic final message. const hitStepLimitWithoutFinalAnswer = step >= maxSteps && !completedWithFinalAnswer; if (hitStepLimitWithoutFinalAnswer) { const fallback = handleMaxStepsTermination("[GoogleAIStudio]", step, maxSteps, "", // finalText is empty — model didn't stop on its own lastStepText); if (fallback) { channel.push(fallback); } } const responseTime = Date.now() - startTime; // Update shared metadata so the returned object reflects final values. metadata.responseTime = responseTime; metadata.totalToolExecutions = allToolCalls.length; // Set token usage and finish reason on the span span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens); span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens); span.setAttribute(ATTR.GEN_AI_FINISH_REASON, hitStepLimitWithoutFinalAnswer ? "max_steps" : "stop"); analyticsResolve({ provider: this.providerName, model: modelName, tokenUsage: { input: totalInputTokens, output: totalOutputTokens, total: totalInputTokens + totalOutputTokens, }, requestDuration: responseTime, timestamp: new Date().toISOString(), }); channel.close(); } catch (err) { channel.error(err); analyticsReject(err); } finally { timeoutController?.cleanup(); } })(); // Suppress unhandled-rejection warnings on loopPromise — errors are // forwarded to the channel and will surface when the caller iterates. loopPromise.catch(() => undefined); return { stream: channel.iterable, provider: this.providerName, model: modelName, toolCalls: allToolCalls, analytics: analyticsPromise, metadata, }; } finally { // Timeout controller cleanup is managed inside the background loop } }, (r) => r.stream, (r, wrapped) => ({ ...r, stream: wrapped })); } /** * Execute generate using native @google/genai SDK for Gemini 3 models * This bypasses @ai-sdk/google to properly handle thought_signature */ async executeNativeGemini3Generate(options) { const modelName = options.model || this.modelName; return withClientSpan({ name: "neurolink.provider.generate", tracer: tracers.provider, attributes: { [ATTR.GEN_AI_SYSTEM]: "google-ai", [ATTR.GEN_AI_MODEL]: modelName, [ATTR.GEN_AI_OPERATION]: "generate", [ATTR.NL_PROVIDER]: this.providerName, }, }, async (span) => { const startTime = Date.now(); const timeout = this.getTimeout(options); const timeoutController = createTimeoutController(timeout, this.providerName, "generate"); try { const apiKey = this.getApiKey(); const client = await createGoogleGenAIClient(apiKey); logger.debug("[GoogleAIStudio] Using native @google/genai for Gemini 3 generate", { model: modelName, hasTools: !!options.tools && Object.keys(options.tools).length > 0, }); // Build contents from input // Prefer input.text over prompt — processCSVFilesForNativeSDK enriches // input.text with inlined CSV data, so using prompt first would discard it. const promptText = options.input?.text || options.prompt || ""; // Prepend prior conversation turns so multi-turn generate calls // see history; otherwise the native generate path silently drops // every turn before the current prompt. // // `buildUserPartsWithMultimodal` also attaches inline image / PDF // parts. Without it the request body was text-only and the model // legitimately reported "no image / PDF attached". const currentContents = []; prependConversationMessages(currentContents, options.conversationMessages); const userParts = await buildUserPartsWithMultimodal(options.input, promptText, "[GoogleAIStudio:generate]"); currentContents.push({ role: "user", parts: userParts, }); // Convert tools (a0269210: trust options.tools — already merged + filtered upstream) let toolsConfig; let executeMap = new Map(); let originalNameMap = new Map(); const shouldUseTools = !options.disableTools; if (shouldUseTools) { const tools = options.tools || {}; if (Object.keys(tools).length > 0) { const result = buildNativeToolDeclarations(tools); toolsConfig = result.toolsConfig; executeMap = result.executeMap; originalNameMap = result.originalNameMap; logger.debug("[GoogleAIStudio] Converted tools for native SDK generate", { toolCount: toolsConfig[0].functionDeclarations.length, toolNames: toolsConfig[0].functionDeclarations.map((t) => t.name), }); } } // Native JSON / schema enforcement (generate path). Mirrors the // stream block above; only set when no tools are being sent // because Gemini cannot combine function calling with JSON mime. const wantsNativeJson = !toolsConfig && (options.output?.format === "json" || !!options.schema); const nativeResponseSchema = wantsNativeJson && options.schema ? buildGeminiResponseSchema(options.schema) : undefined; const config = buildNativeConfig({ ...options, wantsJsonOutput: wantsNativeJson, responseSchema: nativeResponseSchema, }, toolsConfig); const composedSignal = composeAbortSignals(options.abortSignal, timeoutController?.controller.signal); const maxSteps = computeMaxSteps(options.maxSteps); let finalText = ""; let lastStepText = ""; let totalInputTokens = 0; let totalOutputTokens = 0; const allToolCalls = []; const toolExecutions = []; let step = 0; const failedTools = new Map(); // Agentic loop for tool calling while (step < maxSteps) { if (composedSignal?.aborted) { throw composedSignal.reason instanceof Error ? composedSignal.reason : new Error("Request aborted"); } step++; logger.debug(`[GoogleAIStudio] Native SDK generate step ${step}/${maxSteps}`); try { const stream = await client.models.generateContentStream({ model: modelName, contents: currentContents, config, ...(composedSignal ? { httpOptions: { signal: composedSignal } } : {}), }); const chunkResult = await collectStreamChunks(stream); totalInputTokens += chunkResult.inputTokens; totalOutputTokens += chunkResult.outputTokens; const stepText = extractTextFromParts(chunkResult.rawResponseParts); // If no function calls, we're done if (chunkResult.stepFunctionCalls.length === 0) { finalText = stepText; break; } lastStepText = stepText; // Record tool call events on the span for (const fc of chunkResult.stepFunctionCalls) { span.addEvent("gen_ai.tool_call", { "tool.name": fc.name, "tool.step": step, }); } logger.debug(`[GoogleAIStudio] Executing ${chunkResult.stepFunctionCalls.length} function calls in generate`); // Add model response with ALL parts (including thoughtSignature) to history // This is critical for Gemini 3 - it requires thought signatures in subsequent turns pushModelResponseToHistory(currentContents, chunkResult.rawResponseParts, chunkResult.stepFunctionCalls); const functionResponses = await executeNativeToolCalls("[GoogleAIStudio]", chunkResult.stepFunctionCalls, executeMap, failedTools, allToolCalls, { toolExecutions, abortSignal: composedSignal, originalNameMap, }); // Add function responses to history — the @google/genai SDK // only accepts "user" and "model" as valid roles in contents. // Function/tool responses must use role: "user" (matching the // SDK's own automaticFunctionCalling implementation). currentContents.push({ role: "user", parts: functionResponses, }); } catch (error) { logger.error("[GoogleAIStudio] Native SDK generate error", error); throw this.handleProviderError(error); } } finalText = handleMaxStepsTermination("[GoogleAIStudio]", step, maxSteps, finalText, lastStepText); const responseTime = Date.now() - startTime; // Set token usage and finish reason on the span span.setAttribute(ATTR.GEN_AI_INPUT_TOKENS, totalInputTokens); span.setAttribute(ATTR.GEN_AI_OUTPUT_TOKENS, totalOutputTokens); span.setAttribute(ATTR.GEN_AI_FINISH_REASON, step >= maxSteps ? "max_steps" : "stop"); // Build EnhancedGenerateResult and route through enhanceResult so // analytics / evaluation / tracing stay attached. The native AI // Studio generate path bypasses BaseProvider.generate(), so // skipping enhanceResult would silently drop those features. const baseResult = { content: finalText, provider: this.providerName, model: modelName, usage: { input: totalInputTokens, output: totalOutputTokens, total: totalInputTokens + totalOutputTokens, }, responseTime, toolsUsed: allToolCalls.map((tc) => tc.toolName), toolExecutions: toolExecutions, enhancedWithTools: allToolCalls.length > 0, }; return this.enhanceResult(baseResult, options, startTime); } finally { timeoutController?.cleanup(); } }); } /** * Override generate to route Gemini 3 models with tools to native SDK */ async generate(optionsOrPrompt) { // Normalize options const options = typeof optionsOrPrompt === "string" ? { prompt: optionsOrPrompt } : optionsOrPrompt; const modelName = options.model || this.modelName; // Image-generation models reject function-calling. Route them to // executeImageGeneration without merging tools. This must happen // BEFORE getToolsForStream to avoid leaking registered (MCP / built-in) // tools into the image API request, which trips // "Function calling is not enabled for this model". // startsWith (not includes) so a hypothetical text model whose ID // contains an image-model string as a substring isn't silently routed // to executeImageGeneration and stripped of tool support. const isImageModel = IMAGE_GENERATION_MODELS.some((m) => modelName.toLowerCase().startsWith(m.toLowerCase())); if (isImageModel) { logger.info("[GoogleAIStudio] Routing image generation model to executeImageGeneration", { model: modelName }); return this.executeImageGeneration(options); } // TTS direct-synthesis mode: synthesise the input text directly (no LLM // call). BaseProvider.runGenerateInActiveContext does the same dispatch // — replicated here because AI Studio's override bypasses that path. if (options.tts?.enabled && !options.tts?.useAiResponse) { logger.info("[GoogleAIStudio] Routing TTS direct-synthesis to handleDirectTTSSynthesis", { model: modelName }); return this.handleDirectTTSSynthesis(options, Date.now()); } // Process the unified `input.files` array before routing to the // native SDK. BaseProvider.generate() runs this preprocessing via // buildMultimodalMessagesArray, but AI Studio's override skips it, // which would otherwise drop text-file content (and the // mimetype-hint contract) on the floor. Mutates options.input.text / // options.input.images / options.input.pdfFiles in place. if (options.input?.files && options.input.files.length > 0) { try { await processUnifiedFilesArray(options, 100 * 1024 * 1024, this.providerName); } catch (fileError) { logger.warn(`[GoogleAIStudio] processUnifiedFilesArray threw, continuing without file content: ${fileError instanceof Error ? fileError.message : String(fileError)}`); } } // Merge registered (built-in / MCP) tools with caller-supplied tools. // AI Studio's generate() bypasses BaseProvider.generate(), so the // ToolsManager-driven merge that normally injects sdk.registerTool() // entries never runs here. Without this call, registered tools never // reach the native function-calling path. const baseTools = !options.disableTools ? await this.getToolsForStream(options) : {}; let mergedOptions = { ...options, tools: baseTools, }; // Check for tools + JSON schema conflict (Gemini limitation) const wantsJsonOutput = options.output?.format === "json" || options.schema; if (wantsJsonOutput && mergedOptions.tools && Object.keys(mergedOptions.tools).length > 0 && !mergedOptions.disableTools) { logger.warn("[GoogleAIStudio] Gemini does not support tools and JSON schema output simultaneously. Disabling tools for this request."); mergedOptions = { ...mergedOptions, disableTools: true, tools: {} }; } const hasActiveTools = !mergedOptions.disableTools && mergedOptions.tools && Object.keys(mergedOptions.tools).length > 0; if (hasActiveTools) { logger.info("[GoogleAIStudio] Routing generate to native @google/genai SDK for tool calling", { model: modelName, totalToolCount: Object.keys(mergedOptions.tools ?? {}).length,