UNPKG

@llumiverse/drivers

Version:

LLM driver implementations. Currently supported are: openai, huggingface, bedrock, replicate.

858 lines 38.5 kB
import { FinishReason, FunctionCallingConfigMode, HarmBlockThreshold, HarmCategory, Modality, ProminentPeople, ThinkingLevel } from "@google/genai"; import { getConversationMeta, getGeminiModelVersion, incrementConversationTurn, isGeminiModelVersionGte, LlumiverseError, ModelType, PromptRole, readStreamAsBase64, stripBase64ImagesFromConversation, stripHeartbeatsFromConversation, truncateLargeTextInConversation, unwrapConversationArray } from "@llumiverse/core"; import { asyncMap } from "@llumiverse/core/async"; function supportsStructuredOutput(options) { // Gemini 1.0 Ultra does not support JSON output, 1.0 Pro does. return !!options.result_schema && !options.model.includes("ultra"); } const geminiSafetySettings = [ { category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH }, { category: HarmCategory.HARM_CATEGORY_HARASSMENT, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH }, { category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH }, { category: HarmCategory.HARM_CATEGORY_HATE_SPEECH, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH }, { category: HarmCategory.HARM_CATEGORY_UNSPECIFIED, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH }, { category: HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY, threshold: HarmBlockThreshold.BLOCK_ONLY_HIGH } ]; // We do the mapping here rather than in common to avoid bringing the SDK into the common package. function getProminentPeopleOption(prominentPeople) { switch (prominentPeople) { case "ALLOW_PROMINENT_PEOPLE": return ProminentPeople.ALLOW_PROMINENT_PEOPLE; case "BLOCK_PROMINENT_PEOPLE": return ProminentPeople.BLOCK_PROMINENT_PEOPLE; case "PROMINENT_PEOPLE_UNSPECIFIED": return ProminentPeople.PROMINENT_PEOPLE_UNSPECIFIED; default: return undefined; } } function getGeminiPayload(options, prompt) { const model_options = options.model_options; const tools = getToolDefinitions(options.tools); // When no tools are provided but conversation contains functionCall/functionResponse parts // (e.g. checkpoint summary calls), convert them to text to avoid API errors. // Use a local variable to avoid mutating the caller's conversation object. let payloadContents = prompt.contents; if (!tools && payloadContents) { const hasToolParts = payloadContents.some(c => c.parts?.some(p => p.functionCall || p.functionResponse)); if (hasToolParts) { payloadContents = convertGeminiFunctionPartsToText(payloadContents); } } const useStructuredOutput = supportsStructuredOutput(options) && !tools; const configNanoBanana = { systemInstruction: prompt.system, safetySettings: geminiSafetySettings, responseModalities: [Modality.TEXT, Modality.IMAGE], // This is an error if only Text, and Only Image just gets blank responses. candidateCount: 1, //Model options temperature: model_options?.temperature, topP: model_options?.top_p, maxOutputTokens: model_options?.max_tokens, stopSequences: model_options?.stop_sequence, thinkingConfig: geminiThinkingConfig(options), labels: options.labels, imageConfig: { imageSize: model_options?.image_size, aspectRatio: model_options?.image_aspect_ratio, personGeneration: model_options?.person_generation, prominentPeople: getProminentPeopleOption(model_options?.prominent_people), outputMimeType: model_options?.output_mime_type, outputCompressionQuality: model_options?.output_compression_quality, } }; const config = { systemInstruction: prompt.system, safetySettings: geminiSafetySettings, tools: tools ? [tools] : undefined, toolConfig: tools ? { functionCallingConfig: { mode: FunctionCallingConfigMode.AUTO, } } : undefined, candidateCount: 1, //JSON/Structured output responseMimeType: useStructuredOutput ? "application/json" : undefined, responseJsonSchema: useStructuredOutput ? options.result_schema : undefined, //Model options temperature: model_options?.temperature, topP: model_options?.top_p, topK: model_options?.top_k, maxOutputTokens: model_options?.max_tokens, stopSequences: model_options?.stop_sequence, presencePenalty: model_options?.presence_penalty, frequencyPenalty: model_options?.frequency_penalty, seed: model_options?.seed, thinkingConfig: geminiThinkingConfig(options), labels: options.labels, }; return { model: options.model, contents: payloadContents, config: options.model.toLowerCase().includes("image") ? configNanoBanana : config, }; } /** * Collect all parts (text and images) from content in order. * This preserves the original ordering of text and image parts. */ function extractCompletionResults(content) { const results = []; const parts = content.parts; if (parts) { for (const part of parts) { if (part.text) { results.push({ type: "text", value: part.text }); } else if (part.inlineData) { const base64ImageBytes = part.inlineData.data ?? ""; const mimeType = part.inlineData.mimeType ?? "image/png"; const imageUrl = `data:${mimeType};base64,${base64ImageBytes}`; results.push({ type: "image", value: imageUrl }); } } } return results; } function collectToolUseParts(content) { const out = []; const parts = content.parts ?? []; for (const part of parts) { if (part.functionCall) { const toolUse = { id: part.functionCall.name ?? '', tool_name: part.functionCall.name ?? '', tool_input: part.functionCall.args, }; // Capture thought_signature for Gemini thinking models (2.5+/3.0+) // This must be passed back with the function response if (part.thoughtSignature) { toolUse.thought_signature = part.thoughtSignature; } out.push(toolUse); } } return out.length > 0 ? out : undefined; } export function mergeConsecutiveRole(contents) { if (!contents || contents.length === 0) return []; const needsMerging = contents.some((content, i) => i < contents.length - 1 && content.role === contents[i + 1].role); // If no merging needed, return original array if (!needsMerging) { return contents; } const result = []; let currentContent = { ...contents[0], parts: [...(contents[0].parts || [])] }; for (let i = 1; i < contents.length; i++) { if (currentContent.role === contents[i].role) { // Same role - concatenate parts (without merging individual parts) currentContent.parts = (currentContent.parts || []).concat(...(contents[i].parts || [])); } else { // Different role - push current and start new result.push(currentContent); currentContent = { ...contents[i], parts: [...(contents[i].parts || [])] }; } } result.push(currentContent); return result; } const supportedFinishReasons = [ FinishReason.MAX_TOKENS, FinishReason.STOP, FinishReason.FINISH_REASON_UNSPECIFIED, ]; // Finish reasons that indicate tool call issues but should be recovered gracefully // instead of throwing an error. The tool_use is still extracted and returned // so the workflow can generate a proper toolError response. const recoverableToolCallReasons = [ 'UNEXPECTED_TOOL_CALL', // Model called an undeclared tool ]; function geminiThinkingBudget(option) { const model_options = option.model_options; // If thinking_budget_tokens is explicitly set in model options, use it directly if (model_options?.thinking_budget_tokens !== undefined) { return model_options.thinking_budget_tokens; } if (model_options?.effort) { return geminiBudgetForEffort(option.model, model_options.effort); } // Set minimum thinking level by default. // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget if (getGeminiModelVersion(option.model) === '2.5') { if (option.model.includes("pro")) { return 128; } return 0; } return undefined; } function geminiThinkingLevelForEffort(model, effort) { if (model.includes("gemini-3-pro-image")) { return ThinkingLevel.HIGH; } if (model.includes("gemini-3.1-flash-image")) { return effort === "low" ? ThinkingLevel.MINIMAL : ThinkingLevel.HIGH; } switch (effort) { case "low": return ThinkingLevel.LOW; case "medium": return ThinkingLevel.MEDIUM; case "high": return ThinkingLevel.HIGH; default: return undefined; } } function geminiBudgetForEffort(model, effort) { const isFlashLite = model.includes("flash-lite"); const isFlash = model.includes("flash") && !isFlashLite; const isPro = model.includes("pro"); if (effort === "low") { if (isPro) return 128; if (isFlashLite) return 512; if (isFlash) return 1; return 1024; } if (effort === "medium") { return 8192; } if (isPro) return 32768; if (isFlash || isFlashLite) return 24576; return 8192; } function geminiThinkingConfig(option) { const model_options = option.model_options; // If thinking options are explicitly set in model options, use them directly const include_thoughts = model_options?.include_thoughts ?? false; if (model_options?.thinking_budget_tokens !== undefined || model_options?.thinking_level) { return { includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens, thinkingLevel: model_options.thinking_level, }; } if (model_options?.effort) { if (isGeminiModelVersionGte(option.model, '3.0')) { return { includeThoughts: include_thoughts, thinkingLevel: geminiThinkingLevelForEffort(option.model, model_options.effort), }; } return { includeThoughts: include_thoughts, thinkingBudget: geminiBudgetForEffort(option.model, model_options.effort), }; } // Set a low thinking level by default. // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking if (isGeminiModelVersionGte(option.model, '3.0')) { return { includeThoughts: include_thoughts, thinkingLevel: option.model.includes("gemini-3-pro-image") ? ThinkingLevel.HIGH : ThinkingLevel.LOW }; } if (isGeminiModelVersionGte(option.model, '2.5')) { const thinking_budget_tokens = geminiThinkingBudget(option) ?? 0; return { includeThoughts: include_thoughts, thinkingBudget: thinking_budget_tokens }; } } export class GeminiModelDefinition { model; constructor(modelId) { this.model = { id: modelId, name: modelId, provider: 'vertexai', type: ModelType.Text, can_stream: true }; } async createPrompt(_driver, segments, options) { const splits = options.model.split("/"); const modelName = splits[splits.length - 1]; options = { ...options, model: modelName }; const schema = options.result_schema; let contents = []; let system = { role: "user", parts: [] }; // Single content block for system messages const safety = []; for (const msg of segments) { // Role specific handling if (msg.role === PromptRole.system) { // Text only for system messages if (msg.files && msg.files.length > 0) { throw new Error("Gemini does not support files/images etc. in system messages. Only text content is allowed."); } if (msg.content) { system.parts?.push({ text: msg.content }); } } else if (msg.role === PromptRole.tool) { if (!msg.tool_use_id) { throw new Error("Tool response missing tool_use_id"); } // Build functionResponse part with optional thought_signature for Gemini thinking models const functionResponsePart = { functionResponse: { name: msg.tool_use_id, response: formatFunctionResponse(msg.content || ''), }, // Include thought_signature if provided (required for Gemini 2.5+/3.0+ thinking models) thoughtSignature: msg.thought_signature, }; contents.push({ role: 'user', parts: [functionResponsePart] }); } else { // PromptRole.user, PromptRole.assistant, PromptRole.safety const parts = []; // Text content handling if (msg.content) { parts.push({ text: msg.content, }); } // File content handling if (msg.files) { for (const f of msg.files) { const fileUrl = await f.getURL(); const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/'); if (isGsUrl) { parts.push({ fileData: { fileUri: fileUrl, mimeType: f.mime_type } }); } else { // Inline data handling const stream = await f.getStream(); const data = await readStreamAsBase64(stream); parts.push({ inlineData: { data, mimeType: f.mime_type } }); } } } if (parts.length > 0) { if (msg.role === PromptRole.safety) { safety.push({ role: 'user', parts, }); } else { contents.push({ role: msg.role === PromptRole.assistant ? 'model' : 'user', parts, }); } } } } // Adding JSON Schema to system message if (schema) { if (supportsStructuredOutput(options) && !options.tools) { // Gemini structured output is unnecessarily sparse. Adding encouragement to fill the fields. // Putting JSON in prompt is not recommended by Google, when using structured output. system.parts?.push({ text: "Fill all appropriate fields in the JSON output." }); } else { // Fallback to putting the schema in the system instructions, if not using structured output. if (options.tools) { system.parts?.push({ text: "When not calling tools, the output must be a JSON object using the following JSON Schema:\n" + JSON.stringify(schema) }); } else { system.parts?.push({ text: "The output must be a JSON object using the following JSON Schema:\n" + JSON.stringify(schema) }); } } } // If no system messages, set system to undefined. if (!system.parts || system.parts.length === 0) { system = undefined; } // Add safety messages to the end of contents. They are in effect user messages that come at the end. if (safety.length > 0) { contents = contents.concat(safety); } // Merge consecutive messages with the same role. Note: this may not be necessary, works without it, keeping to match previous behavior. contents = mergeConsecutiveRole(contents); return { contents, system }; } usageMetadataToTokenUsage(driver, usageMetadata) { if (!usageMetadata || !usageMetadata.totalTokenCount) { return {}; } const tokenUsage = { total: usageMetadata.totalTokenCount, prompt: usageMetadata.promptTokenCount, prompt_cached: usageMetadata.cachedContentTokenCount ?? undefined, prompt_new: (usageMetadata.promptTokenCount ?? 0) - (usageMetadata.cachedContentTokenCount ?? 0), }; //Output/Response side tokenUsage.result = (usageMetadata.candidatesTokenCount ?? 0) + (usageMetadata.thoughtsTokenCount ?? 0) + (usageMetadata.toolUsePromptTokenCount ?? 0); if ((tokenUsage.total ?? 0) !== (tokenUsage.prompt ?? 0) + tokenUsage.result) { // Token-accounting mismatch: warn-level diagnostic (the call still // returns the best-effort tokenUsage). Use the driver's structured // logger so we don't promote stderr writes to ERROR in serverless // log aggregators — see the recoverable-tool-call sites below. driver.logger.warn({ total: tokenUsage.total, prompt: tokenUsage.prompt, result: tokenUsage.result }, "[VertexAI] Gemini token usage mismatch: total does not equal prompt + result"); } if (!tokenUsage.result) { tokenUsage.result = undefined; // If no result, mark as undefined } return tokenUsage; } async requestTextCompletion(driver, prompt, options) { const splits = options.model.split("/"); let region = undefined; if (splits[0] === "locations" && splits.length >= 2) { region = splits[1]; } const modelName = splits[splits.length - 1]; options = { ...options, model: modelName }; // Restore system instruction from stored conversation on resume. // The stored _llumiverse_system contains the complete system (interaction prompt + schema) // from the initial call. Always prefer it over the prompt's system, which on resume only // contains the schema instruction (no interaction system segments are present on resume). const existingSystem = extractSystemFromConversation(options.conversation); if (existingSystem) { prompt.system = existingSystem; } let conversation = updateConversation(options.conversation, prompt.contents); prompt.contents = conversation; // TODO: Remove hack, use global endpoint manually if needed. if (options.model.includes("gemini-2.5-flash-image")) { region = "global"; // Gemini Flash Image only available in global region, this is for nano-banana model } const model_options = options.model_options; const client = driver.getGoogleGenAIClient(region, model_options?.flex ?? false); const payload = getGeminiPayload(options, prompt); const response = await client.models.generateContent(payload); const token_usage = this.usageMetadataToTokenUsage(driver, response.usageMetadata); let tool_use; let finish_reason, result; const candidate = response.candidates && response.candidates[0]; if (candidate) { switch (candidate.finishReason) { case FinishReason.MAX_TOKENS: finish_reason = "length"; break; case FinishReason.STOP: finish_reason = "stop"; break; default: finish_reason = candidate.finishReason; } const content = candidate.content; // Check for unsupported finish reasons, but allow recoverable tool call issues const isRecoverableToolCall = recoverableToolCallReasons.includes(candidate.finishReason); if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason) && !isRecoverableToolCall) { throw new Error(`Unsupported finish reason: ${candidate.finishReason}, ` + `finish message: ${candidate.finishMessage}, ` + `content: ${JSON.stringify(content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`); } if (content) { tool_use = collectToolUseParts(content); // For recoverable tool call issues, log warning but continue processing // The workflow will handle the invalid tool call gracefully. // Route through the driver's structured logger instead of `console.warn` // so downstream runtimes (e.g. Cloud Run) don't promote stderr writes // to ERROR severity for what is, by definition, a recoverable event. if (isRecoverableToolCall && tool_use && tool_use.length > 0) { driver.logger.warn(`[Gemini] Recoverable tool call issue (${candidate.finishReason}): ` + `Model tried to call undeclared tool(s): ${tool_use.map(t => t.tool_name).join(', ')}`); } result = extractCompletionResults(content); conversation = updateConversation(conversation, [content]); } } if (tool_use) { finish_reason = "tool_use"; } // Increment turn counter for deferred stripping conversation = incrementConversationTurn(conversation); // Strip large base64 image data based on options.stripImagesAfterTurns const currentTurn = getConversationMeta(conversation).turnNumber; const stripOptions = { keepForTurns: options.stripImagesAfterTurns ?? Infinity, currentTurn, textMaxTokens: options.stripTextMaxTokens }; let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions); // Truncate large text content if configured processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions); // Strip old heartbeat status messages processedConversation = stripHeartbeatsFromConversation(processedConversation, { keepForTurns: options.stripHeartbeatsAfterTurns ?? 1, currentTurn, }); // Preserve system instruction in conversation for multi-turn support const finalConversation = storeSystemInConversation(processedConversation, prompt.system); return { result: result && result.length > 0 ? result : [{ type: "text", value: '' }], token_usage: token_usage, finish_reason: finish_reason, original_response: options.include_original_response ? response : undefined, conversation: finalConversation, tool_use }; } async requestTextCompletionStream(driver, prompt, options) { const splits = options.model.split("/"); let region = undefined; if (splits[0] === "locations" && splits.length >= 2) { region = splits[1]; } const modelName = splits[splits.length - 1]; options = { ...options, model: modelName }; // Restore system instruction from stored conversation on resume. // The stored _llumiverse_system contains the complete system (interaction prompt + schema) // from the initial call. Always prefer it over the prompt's system, which on resume only // contains the schema instruction (no interaction system segments are present on resume). const existingSystem = extractSystemFromConversation(options.conversation); if (existingSystem) { prompt.system = existingSystem; } // Include conversation history in prompt contents (same as non-streaming) const conversation = updateConversation(options.conversation, prompt.contents); prompt.contents = conversation; if (options.model.includes("gemini-2.5-flash-image")) { region = "global"; // Gemini Flash Image only available in global region, this is for nano-banana model } const model_options = options.model_options; const client = driver.getGoogleGenAIClient(region, model_options?.flex ?? false); const payload = getGeminiPayload(options, prompt); const response = await client.models.generateContentStream(payload); const stream = asyncMap(response, async (item) => { const token_usage = this.usageMetadataToTokenUsage(driver, item.usageMetadata); if (item.candidates && item.candidates.length > 0) { for (const candidate of item.candidates) { let tool_use; let finish_reason; switch (candidate.finishReason) { case FinishReason.MAX_TOKENS: finish_reason = "length"; break; case FinishReason.STOP: finish_reason = "stop"; break; default: finish_reason = candidate.finishReason; } // Check for unsupported finish reasons, but allow recoverable tool call issues const isRecoverableToolCall = recoverableToolCallReasons.includes(candidate.finishReason); if (candidate.finishReason && !supportedFinishReasons.includes(candidate.finishReason) && !isRecoverableToolCall) { throw new Error(`Unsupported finish reason: ${candidate.finishReason}, ` + `finish message: ${candidate.finishMessage}, ` + `content: ${JSON.stringify(candidate.content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`); } if (candidate.content?.role === 'model') { // Collect all parts in order (text and images) const combinedResults = extractCompletionResults(candidate.content); tool_use = collectToolUseParts(candidate.content); if (tool_use) { finish_reason = "tool_use"; // Log warning for recoverable tool call issues — see the // matching site in `requestTextCompletion` above for why // we route through the driver's logger instead of // `console.warn`. if (isRecoverableToolCall) { driver.logger.warn(`[Gemini] Recoverable tool call issue (${candidate.finishReason}): ` + `Model tried to call undeclared tool(s): ${tool_use.map(t => t.tool_name).join(', ')}`); } } return { result: combinedResults.length > 0 ? combinedResults : [], token_usage: token_usage, finish_reason: finish_reason, tool_use, }; } } } //No normal output, returning block reason if it exists. return { result: item.promptFeedback?.blockReasonMessage ? [{ type: "text", value: item.promptFeedback.blockReasonMessage }] : [], finish_reason: item.promptFeedback?.blockReason ?? "", token_usage: token_usage, }; }); return stream; } /** * Format Google API errors into LlumiverseError with proper status codes and retryability. * * Google API errors follow AIP-193 standard: * - ApiError.status: HTTP status code * - ApiError.message: Error message * * Common error codes: * - 400 (INVALID_ARGUMENT): Invalid request parameters * - 401 (UNAUTHENTICATED): Authentication required * - 403 (PERMISSION_DENIED): Insufficient permissions * - 404 (NOT_FOUND): Resource not found * - 429 (RESOURCE_EXHAUSTED): Rate limit/quota exceeded * - 500 (INTERNAL): Internal server error * - 503 (UNAVAILABLE): Service temporarily unavailable * - 504 (DEADLINE_EXCEEDED): Request timeout * * @see https://google.aip.dev/193 * @see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/api-errors */ formatLlumiverseError(_driver, error, context) { // Check if it's a Google API error with status code const isApiError = this.isGoogleApiError(error); if (!isApiError) { // Not a Google API error, use default handling // This will be called by the driver's default formatLlumiverseError throw error; } const apiError = error; const httpStatusCode = apiError.status; // Extract error message const message = apiError.message || String(error); // Build user-facing message with status code let userMessage = message; // Include status code in message (for end-user visibility) if (httpStatusCode) { userMessage = `[${httpStatusCode}] ${userMessage}`; } // Determine retryability based on Google error codes const retryable = this.isGeminiErrorRetryable(httpStatusCode); // Extract error name/type from message if present const errorName = this.extractErrorName(message); return new LlumiverseError(`[${context.provider}] ${userMessage}`, retryable, context, error, httpStatusCode, errorName); } /** * Type guard to check if error is a Google API error. */ isGoogleApiError(error) { return (error !== null && typeof error === 'object' && 'status' in error && typeof error.status === 'number' && 'message' in error); } /** * Determine if a Google API error is retryable based on HTTP status code. * * Retryable errors (per Google AIP-194): * - 408 (REQUEST_TIMEOUT): Request timeout * - 429 (RESOURCE_EXHAUSTED): Rate limit exceeded, quota exhausted * - 500 (INTERNAL): Internal server error * - 502 (BAD_GATEWAY): Bad gateway * - 503 (UNAVAILABLE): Service temporarily unavailable * - 504 (DEADLINE_EXCEEDED): Gateway timeout * * Non-retryable errors: * - 400 (INVALID_ARGUMENT): Invalid request parameters * - 401 (UNAUTHENTICATED): Authentication required * - 403 (PERMISSION_DENIED): Insufficient permissions * - 404 (NOT_FOUND): Resource not found * - 409 (CONFLICT): Resource conflict * - Other 4xx client errors * * @param httpStatusCode - The HTTP status code from the API error * @returns True if retryable, false if not retryable, undefined if unknown */ isGeminiErrorRetryable(httpStatusCode) { // Retryable status codes if (httpStatusCode === 408) return true; // Request timeout if (httpStatusCode === 429) return true; // Rate limit/quota if (httpStatusCode === 502) return true; // Bad gateway if (httpStatusCode === 503) return true; // Service unavailable if (httpStatusCode === 504) return true; // Gateway timeout if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Other 5xx server errors // Non-retryable 4xx client errors if (httpStatusCode >= 400 && httpStatusCode < 500) return false; // Unknown status codes - let consumer decide retry strategy return undefined; } /** * Extract error type name from error message. * Google errors often include the error type in the message. * Examples: "INVALID_ARGUMENT", "RESOURCE_EXHAUSTED", "PERMISSION_DENIED" */ extractErrorName(message) { // Common Google error patterns const patterns = [ /^([A-Z_]+):/, // "ERROR_NAME: message" /\[([A-Z_]+)\]/, // "[ERROR_NAME] message" /^(\w+Error):/, // "ErrorTypeError: message" ]; for (const pattern of patterns) { const match = message.match(pattern); if (match) { return match[1]; } } return undefined; } } /** * Converts functionCall and functionResponse parts to text parts in Gemini Content[]. * Preserves tool call information while removing structured parts that require * tools/toolConfig to be defined in the API request. */ export function convertGeminiFunctionPartsToText(contents) { return contents.map(content => { if (!content.parts) return content; const hasFunctionParts = content.parts.some(p => p.functionCall || p.functionResponse); if (!hasFunctionParts) return content; const newParts = content.parts.map(part => { if (part.functionCall) { const argsStr = part.functionCall.args ? JSON.stringify(part.functionCall.args) : ''; const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr; return { text: `[Tool call: ${part.functionCall.name}(${truncated})]` }; } if (part.functionResponse) { const respStr = part.functionResponse.response ? JSON.stringify(part.functionResponse.response) : 'No response'; const truncated = respStr.length > 500 ? respStr.substring(0, 500) + '...' : respStr; return { text: `[Tool result for ${part.functionResponse.name}: ${truncated}]` }; } return part; }); return { ...content, parts: newParts }; }); } function getToolDefinitions(tools) { if (!tools || tools.length === 0) { return undefined; } // VertexAI Gemini only supports one tool at a time. // For multiple tools, we have multiple functions in one tool. return { functionDeclarations: tools.map(getToolFunction), }; } function getToolFunction(tool) { return { name: tool.name, description: tool.description, // Pass the input_schema directly as a JSON Schema object. // parametersJsonSchema accepts standard JSON Schema and is mutually exclusive // with the legacy parameters field (which required a proprietary Gemini Schema type). parametersJsonSchema: tool.input_schema, }; } /** * Update the conversation messages * @param prompt * @param response * @returns */ function updateConversation(conversation, prompt) { // Unwrap array if wrapped, otherwise treat as array const unwrapped = unwrapConversationArray(conversation); const convArray = unwrapped ?? (conversation || []); return convArray.concat(prompt); } const SYSTEM_KEY = '_llumiverse_system'; /** * Extract the stored system instruction from a Gemini conversation object. * Returns undefined if no system was stored. */ function extractSystemFromConversation(conversation) { if (typeof conversation === 'object' && conversation !== null) { const c = conversation; if (c[SYSTEM_KEY] && typeof c[SYSTEM_KEY] === 'object') { return c[SYSTEM_KEY]; } } return undefined; } /** * Store the system instruction in the Gemini conversation wrapper object. * The conversation is already wrapped by incrementConversationTurn into * { _arrayConversation: Content[], _llumiverse_meta: {...} }. * We add _llumiverse_system alongside these fields. */ function storeSystemInConversation(conversation, system) { if (!system) return conversation; if (typeof conversation === 'object' && conversation !== null) { return { ...conversation, [SYSTEM_KEY]: system }; } return conversation; } /** * * Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string. * * This is an excerpt from googleapis.github.io/python-genai: * * The function response in JSON object format. * Use “output” key to specify function output and “error” key to specify error details (if any). * If “output” and “error” keys are not specified, then whole “response” is treated as function output. * @see https://googleapis.github.io/python-genai/genai.html#genai.types.FunctionResponse */ function formatFunctionResponse(response) { response = response.trim(); if (response.startsWith("{") && response.endsWith("}")) { try { return JSON.parse(response); } catch (e) { return { output: response }; } } else { return { output: response }; } } //# sourceMappingURL=gemini.js.map