UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

352 lines 14.6 kB
/** * GAIA Agent — Gemini 2.5 Pro Thinking adapter (Phase 2 pilot) * * Adapts the existing GAIA tool catalogue to Google's generateContent API, * translating Anthropic tool_use ↔ Gemini functionCall/functionResponse. * * Loop algorithm mirrors gaia-agent.ts: * 1. Build initial contents array with system instruction + first user turn. * 2. Call Gemini generateContent with functionDeclarations. * 3. On functionCall parts: execute tools, append functionResponse, repeat. * 4. On text-only response: scan for FINAL_ANSWER pattern. * 5. On maxTurns: return timedOut result. * * Gemini 2.5 Pro pricing (2026-05-28, per million tokens): * Input: $1.25 (≤200k tokens), $2.50 (>200k) * Output: $10.00 (≤200k tokens), $15.00 (>200k) * Thinking tokens are billed as output tokens. * * Cost gate for pilot: ≤$0.12 per question average. * Expected per-question cost at ~10k input + ~2k output: ~$0.032 — well within gate. * * Refs: ADR-133, ADR-135, #2156 Phase 2 */ import { execSync } from 'node:child_process'; import * as fs from 'node:fs'; import * as path from 'node:path'; import { createDefaultToolCatalogue, } from './gaia-tools/index.js'; // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- const GEMINI_API_BASE = 'https://generativelanguage.googleapis.com/v1beta/models'; export const DEFAULT_GEMINI_MODEL = 'gemini-2.5-pro'; const DEFAULT_MAX_TURNS = 12; const DEFAULT_MAX_TOKENS_PER_TURN = 8192; const DEFAULT_PER_TURN_TIMEOUT_MS = 120_000; /** Gemini 2.5 Pro pricing (USD per million tokens, ≤200k bracket). */ const GEMINI_INPUT_COST_PER_M = 1.25; const GEMINI_OUTPUT_COST_PER_M = 10.00; const FINAL_ANSWER_RE = /FINAL_ANSWER:\s*(.+)/i; // --------------------------------------------------------------------------- // API key resolution // --------------------------------------------------------------------------- export function resolveGeminiApiKey(apiKey) { if (apiKey && apiKey.trim()) return apiKey.trim(); const envKey = process.env['GOOGLE_AI_API_KEY']; if (envKey && envKey.trim()) return envKey.trim(); try { const out = execSync('gcloud secrets versions access latest --secret=GOOGLE_AI_API_KEY --project=ruv-dev 2>/dev/null', { encoding: 'utf-8', timeout: 10_000 }).trim(); if (out) return out; } catch { /* fall through */ } throw new Error('GOOGLE_AI_API_KEY not found. Set the env var or store in GCP Secret Manager.'); } function toGeminiType(anthropicType) { const map = { string: 'STRING', number: 'NUMBER', integer: 'INTEGER', boolean: 'BOOLEAN', array: 'ARRAY', object: 'OBJECT', }; return map[anthropicType.toLowerCase()] ?? 'STRING'; } function translateToolDef(def) { const props = {}; for (const [key, val] of Object.entries(def.input_schema.properties)) { props[key] = { type: toGeminiType(val.type), description: val.description }; } return { name: def.name, description: def.description, parameters: { type: 'OBJECT', properties: props, required: def.input_schema.required ?? [], }, }; } // --------------------------------------------------------------------------- // Single Gemini API call // --------------------------------------------------------------------------- async function callGemini(apiKey, model, contents, systemInstruction, functionDeclarations, maxTokens, timeoutMs) { const url = `${GEMINI_API_BASE}/${model}:generateContent?key=${encodeURIComponent(apiKey)}`; const requestBody = { contents, systemInstruction: { parts: [{ text: systemInstruction }] }, generationConfig: { maxOutputTokens: maxTokens, thinkingConfig: { thinkingBudget: 8192 }, }, }; if (functionDeclarations.length > 0) { requestBody['tools'] = [{ functionDeclarations }]; requestBody['toolConfig'] = { functionCallingConfig: { mode: 'AUTO' } }; } const controller = new AbortController(); const timer = setTimeout(() => controller.abort(), timeoutMs); let res; try { res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(requestBody), signal: controller.signal, }); } finally { clearTimeout(timer); } if (!res.ok) { const errText = await res.text().catch(() => '<unreadable>'); throw new Error(`Gemini API ${res.status}: ${errText.slice(0, 400)}`); } return (await res.json()); } // --------------------------------------------------------------------------- // Build initial contents (handles image attachments inline) // --------------------------------------------------------------------------- function buildInitialContents(question) { const questionText = question.question; const parts = []; if (question.file_path) { const ext = path.extname(question.file_path).toLowerCase(); const imageExts = ['.png', '.jpg', '.jpeg', '.gif', '.webp']; if (imageExts.includes(ext)) { try { const buf = fs.readFileSync(question.file_path); const mediaTypeMap = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.webp': 'image/webp', }; parts.push({ text: questionText }); // Gemini inline image part parts.push({ inlineData: { mimeType: mediaTypeMap[ext] ?? 'image/png', data: buf.toString('base64'), }, }); } catch { parts.push({ text: questionText + `\n\nAttached file: ${question.file_path}. Call file_read to read it.`, }); } } else { parts.push({ text: questionText + `\n\nThis question has an attached file. Call file_read with path="${question.file_path}" to read it, then answer.`, }); } } else { parts.push({ text: questionText }); } return [{ role: 'user', parts }]; } // --------------------------------------------------------------------------- // Extract FINAL_ANSWER from model response text // --------------------------------------------------------------------------- function extractFinalAnswer(candidate) { if (!candidate?.content?.parts) return null; for (const part of candidate.content.parts) { if (part.thought) continue; if (part.text) { const match = FINAL_ANSWER_RE.exec(part.text); if (match?.[1]) return match[1].trim(); } } return null; } // --------------------------------------------------------------------------- // Check if response has any functionCall parts // --------------------------------------------------------------------------- function getFunctionCalls(candidate) { if (!candidate?.content?.parts) return []; return candidate.content.parts .filter((p) => !!p.functionCall) .map((p) => p.functionCall); } // --------------------------------------------------------------------------- // Main agent loop // --------------------------------------------------------------------------- export async function runGeminiAgent(question, options = {}) { const { model = DEFAULT_GEMINI_MODEL, maxTurns = DEFAULT_MAX_TURNS, maxTokensPerTurn = DEFAULT_MAX_TOKENS_PER_TURN, perTurnTimeoutMs = DEFAULT_PER_TURN_TIMEOUT_MS, apiKey: suppliedKey, catalogue: suppliedCatalogue, } = options; const wallStart = Date.now(); const apiKey = resolveGeminiApiKey(suppliedKey); const catalogue = suppliedCatalogue ?? createDefaultToolCatalogue(); const functionDeclarations = catalogue.map((t) => translateToolDef(t.definition)); const systemInstruction = [ 'You are a precise question-answering agent. Answer the user\'s question using the tools available.', '', 'RULES:', '1. Use tools when you need information you do not have with certainty.', '2. When you have a final answer, output it on its own line in this EXACT format:', ' FINAL_ANSWER: <your answer here>', '3. Keep answers concise. For numbers, give just the number. For names, give just the name.', '4. Do not include units unless the question specifically asks for them.', '5. MANDATORY: You MUST ALWAYS end your final response with a FINAL_ANSWER line.', '6. If the question text appears garbled or reversed, try to interpret it before concluding you cannot answer.', ].join('\n'); const toolCallsByName = {}; let totalInputTokens = 0; let totalOutputTokens = 0; let totalThinkingTokens = 0; const contents = buildInitialContents(question); let turns = 0; for (let turn = 0; turn < maxTurns; turn++) { turns = turn + 1; let resp; try { resp = await callGemini(apiKey, model, contents, systemInstruction, functionDeclarations, maxTokensPerTurn, perTurnTimeoutMs); } catch (err) { return { questionId: question.task_id, finalAnswer: null, turns, toolCallsByName, totalInputTokens, totalOutputTokens, totalThinkingTokens, wallMs: Date.now() - wallStart, estimatedCostUsd: estimateCost(totalInputTokens, totalOutputTokens), error: err instanceof Error ? err.message : String(err), }; } if (resp.error) { return { questionId: question.task_id, finalAnswer: null, turns, toolCallsByName, totalInputTokens, totalOutputTokens, totalThinkingTokens, wallMs: Date.now() - wallStart, estimatedCostUsd: estimateCost(totalInputTokens, totalOutputTokens), error: `Gemini error ${resp.error.code}: ${resp.error.message}`, }; } // Accumulate token counts const usage = resp.usageMetadata ?? {}; totalInputTokens += usage.promptTokenCount ?? 0; totalOutputTokens += usage.candidatesTokenCount ?? 0; totalThinkingTokens += usage.thoughtsTokenCount ?? 0; const candidate = resp.candidates?.[0]; const functionCalls = getFunctionCalls(candidate); if (functionCalls.length > 0) { // Append model turn (with functionCall parts) if (candidate?.content) { contents.push({ role: 'model', parts: candidate.content.parts }); } // Execute all function calls and build functionResponse parts const responseParts = await Promise.all(functionCalls.map(async (call) => { toolCallsByName[call.name] = (toolCallsByName[call.name] ?? 0) + 1; const tool = catalogue.find((t) => t.name === call.name); if (!tool) { return { functionResponse: { name: call.name, response: { error: `Unknown tool "${call.name}"` }, }, }; } try { const output = await tool.execute(call.args); return { functionResponse: { name: call.name, response: { output }, }, }; } catch (err) { return { functionResponse: { name: call.name, response: { error: err instanceof Error ? err.message : String(err) }, }, }; } })); contents.push({ role: 'user', parts: responseParts }); continue; } // No function calls — extract final answer const finalAnswer = extractFinalAnswer(candidate); return { questionId: question.task_id, finalAnswer, turns, toolCallsByName, totalInputTokens, totalOutputTokens, totalThinkingTokens, wallMs: Date.now() - wallStart, estimatedCostUsd: estimateCost(totalInputTokens, totalOutputTokens + totalThinkingTokens), }; } // Exhausted maxTurns return { questionId: question.task_id, finalAnswer: null, turns, toolCallsByName, totalInputTokens, totalOutputTokens, totalThinkingTokens, wallMs: Date.now() - wallStart, estimatedCostUsd: estimateCost(totalInputTokens, totalOutputTokens + totalThinkingTokens), timedOut: true, }; } function estimateCost(inputTokens, outputTokens) { return ((inputTokens / 1_000_000) * GEMINI_INPUT_COST_PER_M + (outputTokens / 1_000_000) * GEMINI_OUTPUT_COST_PER_M); } // --------------------------------------------------------------------------- // Answer matching (same as gaia-agent.ts) // --------------------------------------------------------------------------- export function isGeminiAnswerCorrect(modelAnswer, expected) { if (!modelAnswer) return false; const norm = (s) => s.trim().toLowerCase(); const normModel = norm(modelAnswer); const normExpected = norm(expected); if (normModel === normExpected) return true; if (normModel.includes(normExpected)) return true; if (normExpected.includes(normModel)) return true; const numModel = parseFloat(normModel.replace(/[^0-9.\-]/g, '')); const numExpected = parseFloat(normExpected.replace(/[^0-9.\-]/g, '')); if (!Number.isNaN(numModel) && !Number.isNaN(numExpected) && numExpected !== 0 && Math.abs((numModel - numExpected) / numExpected) < 0.01) return true; return false; } //# sourceMappingURL=gaia-agent-gemini.js.map