UNPKG

@aj-archipelago/cortex

Version:

Cortex is a GraphQL API for AI. It provides a simple, extensible interface for using AI services from OpenAI, Azure and others.

316 lines (273 loc) 15.5 kB
import Gemini3ImagePlugin from './gemini3ImagePlugin.js'; import CortexResponse from '../../lib/cortexResponse.js'; import logger from '../../lib/logger.js'; class Gemini3ReasoningVisionPlugin extends Gemini3ImagePlugin { constructor(pathway, model) { super(pathway, model); } // Override to include thoughtSignature - required by Gemini 3 for function call history buildFunctionCallPart(toolCall, args) { const part = { functionCall: { name: toolCall.function.name, args: args } }; // Include thoughtSignature if present - required by Gemini 3+ models // If thoughtSignature was lost (e.g., during history persistence), use the documented // dummy value to skip validation - see https://ai.google.dev/gemini-api/docs/thought-signatures if (toolCall.thoughtSignature) { part.thoughtSignature = toolCall.thoughtSignature; } else { // Fallback: use documented dummy signature to prevent 400 errors // This allows the request to proceed but may affect reasoning quality part.thoughtSignature = "skip_thought_signature_validator"; } return part; } // Override to capture thoughtSignature from Gemini 3 functionCall responses buildToolCallFromFunctionCall(part) { const toolCall = { id: part.functionCall.name + '_' + Date.now(), type: "function", function: { name: part.functionCall.name, arguments: JSON.stringify(part.functionCall.args || {}) } }; // Preserve thoughtSignature for function call history if (part.functionCall.thoughtSignature || part.thoughtSignature) { toolCall.thoughtSignature = part.functionCall.thoughtSignature || part.thoughtSignature; } return toolCall; } // Override getRequestParameters to add Gemini 3 specific handling getRequestParameters(text, parameters, prompt, cortexRequest) { // Get original messages from getCompiledPrompt before they're converted to Gemini format const { modelPromptMessages } = this.getCompiledPrompt(text, parameters, prompt); const messages = modelPromptMessages || []; const baseParameters = super.getRequestParameters(text, parameters, prompt, cortexRequest); // Transform contents for Gemini 3 format: // 1. Function responses: role 'function' -> 'user', response.content -> response.output // 2. Add functionCall messages for assistant tool_calls (with thoughtSignature) if (baseParameters.contents && Array.isArray(baseParameters.contents)) { const newContents = []; // Build a map of tool response indices to find where to insert functionCall messages // The pattern is: assistant+tool_calls message followed by tool response messages let lastFunctionResponseIndex = -1; for (let i = 0; i < baseParameters.contents.length; i++) { const content = baseParameters.contents[i]; // Check if we need to insert a functionCall message before this function response if (content.role === 'function' && content.parts?.[0]?.functionResponse) { // Look for the preceding assistant message with tool_calls in the original messages // that corresponds to this function response const functionName = content.parts[0].functionResponse.name; // Find matching assistant message with this tool call for (const message of messages) { if (message.role === 'assistant' && message.tool_calls?.length > 0) { const hasMatchingToolCall = message.tool_calls.some(tc => tc.function?.name === functionName || tc.id?.startsWith(functionName + '_') ); if (hasMatchingToolCall && lastFunctionResponseIndex < i) { // Build functionCall message with thoughtSignature const parts = []; if (message.content && typeof message.content === 'string' && message.content.trim()) { parts.push({ text: message.content }); } for (const toolCall of message.tool_calls) { if (toolCall.function?.name) { let args = {}; try { args = typeof toolCall.function.arguments === 'string' ? JSON.parse(toolCall.function.arguments) : (toolCall.function.arguments || {}); } catch (e) { args = {}; } parts.push(this.buildFunctionCallPart(toolCall, args)); } } if (parts.length > 0) { newContents.push({ role: 'model', parts: parts }); } lastFunctionResponseIndex = i; break; } } } // Transform function response: role 'function' -> 'user', content -> output const fr = content.parts[0].functionResponse; let responseData = fr.response; if (responseData?.content !== undefined) { const contentStr = responseData.content; try { responseData = JSON.parse(contentStr); } catch (e) { responseData = { output: contentStr }; } } newContents.push({ role: 'user', parts: [{ functionResponse: { name: fr.name, response: responseData } }] }); } else { newContents.push(content); } } baseParameters.contents = newContents; } // Add Gemini 3 thinking support // Gemini 3 uses thinkingLevel: 'low' or 'high' (instead of thinkingBudget) // includeThoughts: true to get thought summaries in response let thinkingLevel = parameters?.thinkingLevel ?? parameters?.thinking_level; let includeThoughts = parameters?.includeThoughts ?? parameters?.include_thoughts ?? false; // Convert OpenAI reasoningEffort to Gemini 3 thinkingLevel // OpenAI supports: 'high', 'medium', 'low', 'none' // Gemini 3 supports: 'high' or 'low' (thinking cannot be disabled) // Mapping: 'high' or 'medium' → 'high', 'low' or 'minimal' → 'low', 'none' → 'low' const reasoningEffort = parameters?.reasoningEffort ?? this.promptParameters?.reasoningEffort; if (reasoningEffort && thinkingLevel === undefined) { const effort = typeof reasoningEffort === 'string' ? reasoningEffort.toLowerCase() : String(reasoningEffort).toLowerCase(); if (effort === 'high' || effort === 'medium') { // High or medium reasoning effort → high thinking level thinkingLevel = 'high'; } else { // Low, minimal, or none → low thinking level (Gemini 3 doesn't support disabling thinking) thinkingLevel = 'low'; } } // Also check pathway parameters if (thinkingLevel === undefined && cortexRequest?.pathway?.thinkingLevel !== undefined) { thinkingLevel = cortexRequest.pathway.thinkingLevel; } else if (thinkingLevel === undefined && cortexRequest?.pathway?.thinking_level !== undefined) { thinkingLevel = cortexRequest.pathway.thinking_level; } else if (thinkingLevel === undefined && cortexRequest?.pathway?.reasoningEffort !== undefined) { // Also check pathway for reasoningEffort const pathwayEffort = typeof cortexRequest.pathway.reasoningEffort === 'string' ? cortexRequest.pathway.reasoningEffort.toLowerCase() : String(cortexRequest.pathway.reasoningEffort).toLowerCase(); if (pathwayEffort === 'high' || pathwayEffort === 'medium') { thinkingLevel = 'high'; } else { thinkingLevel = 'low'; } } if (includeThoughts === false && cortexRequest?.pathway?.includeThoughts !== undefined) { includeThoughts = cortexRequest.pathway.includeThoughts; } else if (includeThoughts === false && cortexRequest?.pathway?.include_thoughts !== undefined) { includeThoughts = cortexRequest.pathway.include_thoughts; } // Set up thinkingConfig in generationConfig if thinking is enabled if (thinkingLevel !== undefined || includeThoughts) { if (!baseParameters.generationConfig.thinkingConfig) { baseParameters.generationConfig.thinkingConfig = {}; } // Gemini 3 uses thinkingLevel: 'low' or 'high' if (thinkingLevel !== undefined) { const level = typeof thinkingLevel === 'string' ? thinkingLevel.toLowerCase() : String(thinkingLevel).toLowerCase(); // Validate and set thinkingLevel (only 'low' or 'high' are valid) if (level === 'low' || level === 'high') { baseParameters.generationConfig.thinkingConfig.thinkingLevel = level; } else { // Default to 'low' if invalid value baseParameters.generationConfig.thinkingConfig.thinkingLevel = 'low'; } } // includeThoughts: true to get thought summaries if (includeThoughts !== undefined) { baseParameters.generationConfig.thinkingConfig.includeThoughts = Boolean(includeThoughts); } } return baseParameters; } // Override parseResponse to handle thought summaries parseResponse(data) { // First, let the parent handle the response const baseResponse = super.parseResponse(data); // Check if we have thought summaries in the response if (data?.candidates?.[0]?.content?.parts) { const parts = data.candidates[0].content.parts; let thoughtSummaries = []; let hasThoughts = false; // Extract thought summaries from parts for (const part of parts) { if (part.thought && part.text) { // This is a thought summary thoughtSummaries.push(part.text); hasThoughts = true; } } // If we have thought summaries, add them to the response if (hasThoughts) { // If baseResponse is already a CortexResponse, add thoughts to it if (baseResponse && typeof baseResponse === 'object' && baseResponse.constructor && baseResponse.constructor.name === 'CortexResponse') { baseResponse.thoughts = thoughtSummaries; return baseResponse; } else { // Create new CortexResponse with thoughts // Preserve the baseResponse text if it's a string const outputText = typeof baseResponse === 'string' ? baseResponse : ''; return new CortexResponse({ output_text: outputText, thoughts: thoughtSummaries, finishReason: data?.candidates?.[0]?.finishReason === 'STOP' ? 'stop' : 'length', usage: data?.usageMetadata || null, metadata: { model: this.modelName } }); } } } return baseResponse; } // Override processStreamEvent to handle thought summaries in streaming processStreamEvent(event, requestProgress) { const baseProgress = super.processStreamEvent(event, requestProgress); const eventData = JSON.parse(event.data); // Initialize thought summaries array if needed if (!requestProgress.thoughts) { requestProgress.thoughts = []; } // Handle thought summaries in streaming if (eventData.candidates?.[0]?.content?.parts) { const parts = eventData.candidates[0].content.parts; for (const part of parts) { if (part.thought && part.text) { // This is a thought summary chunk // Accumulate thought summaries if (!requestProgress.thoughts.includes(part.text)) { requestProgress.thoughts.push(part.text); } // Optionally, you could emit thought chunks separately // For now, we'll accumulate them and they'll be available in the final response } } } return baseProgress; } // Override logRequestData to include thought information logRequestData(data, responseData, prompt) { // Check if responseData is a CortexResponse object with thoughts if (responseData && typeof responseData === 'object' && responseData.constructor && responseData.constructor.name === 'CortexResponse') { const { length, units } = this.getLength(responseData.output_text || ''); logger.info(`[response received containing ${length} ${units}]`); if (responseData.thoughts && responseData.thoughts.length > 0) { logger.info(`[response contains ${responseData.thoughts.length} thought summary(ies)]`); responseData.thoughts.forEach((thought, index) => { logger.verbose(`[thought ${index + 1}]: ${this.shortenContent(thought)}`); }); } if (responseData.artifacts && responseData.artifacts.length > 0) { logger.info(`[response contains ${responseData.artifacts.length} image artifact(s)]`); } logger.verbose(`${this.shortenContent(responseData.output_text || '')}`); return; } // Fall back to parent implementation for non-CortexResponse objects super.logRequestData(data, responseData, prompt); } } export default Gemini3ReasoningVisionPlugin;