UNPKG

@quantumai/quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

github.com/kanghunlee/quantum-cli

kanghunlee/quantum-cli

559 lines • 24.1 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { getFolderStructure } from '../utils/getFolderStructure.js'; import { Turn, GeminiEventType, } from './turn.js'; import { getCoreSystemPrompt, getCompressionPrompt } from './prompts.js'; import { getResponseText } from '../utils/generateContentResponseUtilities.js'; import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat } from './geminiChat.js'; import { retryWithBackoff } from '../utils/retry.js'; import { getErrorMessage } from '../utils/errors.js'; import { tokenLimit } from './tokenLimits.js'; import { AuthType, createContentGenerator, } from './contentGenerator.js'; import { ProxyAgent, setGlobalDispatcher } from 'undici'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { CollaborationEngine } from '../collaboration/collaboration-engine.js'; import { UncertaintyDetector } from '../collaboration/detection/uncertainty-detector.js'; import { AutoTriggerSystem, createDefaultAutoTriggerConfig, } from '../collaboration/detection/auto-trigger.js'; import { isThinkingSupported, findIndexAfterFraction, } from '../utils/model-utils.js'; export { findIndexAfterFraction } from '../utils/model-utils.js'; export class GeminiClient { config; chat; contentGenerator; embeddingModel; generateContentConfig = { temperature: 0, topP: 1, }; MAX_TURNS = 100; /** * Threshold for compression token count as a fraction of the model's token limit. * If the chat history exceeds this threshold, it will be compressed. */ COMPRESSION_TOKEN_THRESHOLD = 0.7; /** * The fraction of the latest chat history to keep. A value of 0.3 * means that only the last 30% of the chat history will be kept after compression. */ COMPRESSION_PRESERVE_THRESHOLD = 0.3; collaborationEngine; uncertaintyDetector; autoTriggerSystem; constructor(config) { this.config = config; if (config.getProxy()) { setGlobalDispatcher(new ProxyAgent(config.getProxy())); } this.embeddingModel = config.getEmbeddingModel(); // New: Initialize CollaborationEngine if collaboration is enabled const collaborationConfig = this.config.getCollaborationConfig(); if (collaborationConfig?.enabled) { this.collaborationEngine = new CollaborationEngine(this.config); this.uncertaintyDetector = new UncertaintyDetector(); // Initialize AutoTriggerSystem with default config and override from collaboration config const autoTriggerConfig = createDefaultAutoTriggerConfig(); if (collaborationConfig.autoVerifyThreshold !== undefined) { autoTriggerConfig.uncertaintyThresholds.verify = collaborationConfig.autoVerifyThreshold; // Convert number to UncertaintyLevel } if (collaborationConfig.maxCostPerQuery !== undefined) { autoTriggerConfig.costLimitation.monthlyLimit = Math.floor(1000 / collaborationConfig.maxCostPerQuery); } this.autoTriggerSystem = new AutoTriggerSystem(autoTriggerConfig); } } async initialize(contentGeneratorConfig) { this.contentGenerator = await createContentGenerator(contentGeneratorConfig, this.config.getSessionId()); this.chat = await this.startChat(); } getContentGenerator() { if (!this.contentGenerator) { throw new Error('Content generator not initialized'); } return this.contentGenerator; } async addHistory(content) { this.getChat().addHistory(content); } getChat() { if (!this.chat) { throw new Error('Chat not initialized'); } return this.chat; } getHistory() { return this.getChat().getHistory(); } setHistory(history) { this.getChat().setHistory(history); } async resetChat() { this.chat = await this.startChat(); } async getEnvironment() { const cwd = this.config.getWorkingDir(); const today = new Date().toLocaleDateString(undefined, { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric', }); const platform = process.platform; const folderStructure = await getFolderStructure(cwd, { fileService: this.config.getFileService(), }); const context = ` This is the Gemini CLI. We are setting up the context for our chat. Today's date is ${today}. My operating system is: ${platform} I'm currently working in the directory: ${cwd} ${folderStructure} `.trim(); const initialParts = [{ text: context }]; const toolRegistry = await this.config.getToolRegistry(); // Add full file context if the flag is set if (this.config.getFullContext()) { try { const readManyFilesTool = toolRegistry.getTool('read_many_files'); if (readManyFilesTool) { // Read all files in the target directory const result = await readManyFilesTool.execute({ paths: ['**/*'], // Read everything recursively useDefaultExcludes: true, // Use default excludes }, AbortSignal.timeout(30000)); if (result.llmContent) { initialParts.push({ text: `\n--- Full File Context ---\n${result.llmContent}`, }); } else { console.warn('Full context requested, but read_many_files returned no content.'); } } else { console.warn('Full context requested, but read_many_files tool not found.'); } } catch (error) { // Not using reportError here as it's a startup/config phase, not a chat/generation phase error. console.error('Error reading full file context:', error); initialParts.push({ text: '\n--- Error reading full file context ---', }); } } return initialParts; } async startChat(extraHistory) { const envParts = await this.getEnvironment(); const toolRegistry = await this.config.getToolRegistry(); const toolDeclarations = toolRegistry.getFunctionDeclarations(); const tools = [{ functionDeclarations: toolDeclarations }]; const history = [ { role: 'user', parts: envParts, }, { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }], }, ...(extraHistory ?? []), ]; try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); const generateContentConfigWithThinking = isThinkingSupported(this.config.getModel()) ? { ...this.generateContentConfig, thinkingConfig: { includeThoughts: true, }, } : this.generateContentConfig; return new GeminiChat(this.config, this.getContentGenerator(), { systemInstruction, ...generateContentConfigWithThinking, tools, }, history); } catch (error) { await reportError(error, 'Error initializing Gemini chat session.', history, 'startChat'); throw new Error(`Failed to initialize chat: ${getErrorMessage(error)}`); } } async generateJson(contents, schema, abortSignal, model = DEFAULT_GEMINI_FLASH_MODEL, config = {}) { try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); const requestConfig = { abortSignal, ...this.generateContentConfig, ...config, }; const apiCall = () => this.getContentGenerator().generateContent({ model, config: { ...requestConfig, systemInstruction, responseSchema: schema, responseMimeType: 'application/json', }, contents, }); const result = await retryWithBackoff(apiCall, { onPersistent429: async (authType) => await this.handleFlashFallback(authType), authType: this.config.getContentGeneratorConfig()?.authType, }); const text = getResponseText(result); if (!text) { const error = new Error('API returned an empty response for generateJson.'); await reportError(error, 'Error in generateJson: API returned an empty response.', contents, 'generateJson-empty-response'); throw error; } try { return JSON.parse(text); } catch (parseError) { await reportError(parseError, 'Failed to parse JSON response from generateJson.', { responseTextFailedToParse: text, originalRequestContents: contents, }, 'generateJson-parse'); throw new Error(`Failed to parse API response as JSON: ${getErrorMessage(parseError)}`); } } catch (error) { if (abortSignal.aborted) { throw error; } // Avoid double reporting for the empty response case handled above if (error instanceof Error && error.message === 'API returned an empty response for generateJson.') { throw error; } await reportError(error, 'Error generating JSON content via API.', contents, 'generateJson-api'); throw new Error(`Failed to generate JSON content: ${getErrorMessage(error)}`); } } async generateContent(contents, generationConfig, abortSignal) { // New: Check if multi-LLM should be used with smart triggering const contentText = this.extractTextFromContents(contents); if (this.collaborationEngine && this.uncertaintyDetector && this.autoTriggerSystem && contentText) { // First, generate with primary model to analyze uncertainty const primaryResponse = await this.generatePrimaryResponse(contents, generationConfig, abortSignal); const primaryText = primaryResponse.text || ''; // Detect uncertainty in the response const uncertaintyResult = this.uncertaintyDetector.detect(primaryText); // Check if collaboration should be triggered const triggerDecision = this.autoTriggerSystem.shouldTriggerVerification(contentText, uncertaintyResult); if (triggerDecision.shouldVerify || triggerDecision.shouldCompare) { console.log(`Auto-triggering collaboration: ${triggerDecision.reason}`); this.autoTriggerSystem.incrementUsage(); const verifiedResponse = triggerDecision.shouldCompare ? await this.collaborationEngine.compareResponses(contentText) : await this.collaborationEngine.generateWithVerification(contentText); // Convert to GenerateContentResponse format return { candidates: [ { content: { parts: [{ text: verifiedResponse.content }], role: 'model', }, finishReason: 'STOP', }, ], text: verifiedResponse.content, data: undefined, functionCalls: undefined, executableCode: undefined, codeExecutionResult: undefined, }; } // Use primary response if no collaboration needed return primaryResponse; } const modelToUse = this.config.getModel(); const configToUse = { ...this.generateContentConfig, ...generationConfig, }; try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); const requestConfig = { abortSignal, ...configToUse, systemInstruction, }; const apiCall = () => this.getContentGenerator().generateContent({ model: modelToUse, config: requestConfig, contents, }); const result = await retryWithBackoff(apiCall, { onPersistent429: async (authType) => await this.handleFlashFallback(authType), authType: this.config.getContentGeneratorConfig()?.authType, }); return result; } catch (error) { if (abortSignal.aborted) { throw error; } await reportError(error, `Error generating content via API with model ${modelToUse}.`, { requestContents: contents, requestConfig: configToUse, }, 'generateContent-api'); throw new Error(`Failed to generate content with model ${modelToUse}: ${getErrorMessage(error)}`); } } async generateEmbedding(texts) { if (!texts || texts.length === 0) { return []; } const embedModelParams = { model: this.embeddingModel, contents: texts, }; const embedContentResponse = await this.getContentGenerator().embedContent(embedModelParams); if (!embedContentResponse.embeddings || embedContentResponse.embeddings.length === 0) { throw new Error('No embeddings found in API response.'); } if (embedContentResponse.embeddings.length !== texts.length) { throw new Error(`API returned a mismatched number of embeddings. Expected ${texts.length}, got ${embedContentResponse.embeddings.length}.`); } return embedContentResponse.embeddings.map((embedding, index) => { const values = embedding.values; if (!values || values.length === 0) { throw new Error(`API returned an empty embedding for input text at index ${index}: "${texts[index]}"`); } return values; }); } async tryCompressChat(force = false) { const curatedHistory = this.getChat().getHistory(true); // Regardless of `force`, don't do anything if the history is empty. if (curatedHistory.length === 0) { return null; } const model = this.config.getModel(); const { totalTokens: originalTokenCount } = await this.getContentGenerator().countTokens({ model, contents: curatedHistory, }); if (originalTokenCount === undefined) { console.warn(`Could not determine token count for model ${model}.`); return null; } // Don't compress if not forced and we are under the limit. if (!force && originalTokenCount < this.COMPRESSION_TOKEN_THRESHOLD * tokenLimit(model)) { return null; } let compressBeforeIndex = findIndexAfterFraction(curatedHistory, 1 - this.COMPRESSION_PRESERVE_THRESHOLD); // Find the first user message after the index. This is the start of the next turn. while (compressBeforeIndex < curatedHistory.length && curatedHistory[compressBeforeIndex]?.role !== 'user') { compressBeforeIndex++; } const historyToCompress = curatedHistory.slice(0, compressBeforeIndex); const historyToKeep = curatedHistory.slice(compressBeforeIndex); this.getChat().setHistory(historyToCompress); const { text: summary } = await this.getChat().sendMessage({ message: { text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.', }, config: { systemInstruction: { text: getCompressionPrompt() }, }, }); this.chat = await this.startChat([ { role: 'user', parts: [{ text: summary }], }, { role: 'model', parts: [{ text: 'Got it. Thanks for the additional context!' }], }, ...historyToKeep, ]); const { totalTokens: newTokenCount } = await this.getContentGenerator().countTokens({ // model might change after calling `sendMessage`, so we get the newest value from config model: this.config.getModel(), contents: this.getChat().getHistory(), }); if (newTokenCount === undefined) { console.warn('Could not determine compressed history token count.'); return null; } return { originalTokenCount, newTokenCount, }; } /** * Handles fallback to Flash model when persistent 429 errors occur for OAuth users. * Uses a fallback handler if provided by the config, otherwise returns null. */ async handleFlashFallback(authType) { // Only handle fallback for OAuth users if (authType !== AuthType.LOGIN_WITH_GOOGLE) { return null; } const currentModel = this.config.getModel(); const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL; // Don't fallback if already using Flash model if (currentModel === fallbackModel) { return null; } // Check if config has a fallback handler (set by CLI package) const fallbackHandler = this.config.flashFallbackHandler; if (typeof fallbackHandler === 'function') { try { const accepted = await fallbackHandler(currentModel, fallbackModel); if (accepted) { this.config.setModel(fallbackModel); return fallbackModel; } } catch (error) { console.warn('Flash fallback handler failed:', error); } } return null; } extractTextFromContents(contents) { if (contents.length === 0) return null; const firstContent = contents[0]; if (firstContent.parts && firstContent.parts.length > 0) { const firstPart = firstContent.parts[0]; if ('text' in firstPart && firstPart.text) { return firstPart.text; } } return null; } async generatePrimaryResponse(contents, generationConfig, abortSignal) { const modelToUse = this.config.getModel(); const configToUse = { ...this.generateContentConfig, ...generationConfig, }; try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); const requestConfig = { abortSignal, ...configToUse, systemInstruction, }; const apiCall = () => this.getContentGenerator().generateContent({ model: modelToUse, config: requestConfig, contents, }); const result = await retryWithBackoff(apiCall, { onPersistent429: async (authType) => await this.handleFlashFallback(authType), authType: this.config.getContentGeneratorConfig()?.authType, }); return result; } catch (error) { if (abortSignal.aborted) { throw error; } await reportError(error, `Error generating primary content via API with model ${modelToUse}.`, { requestContents: contents, requestConfig: configToUse, }, 'generatePrimaryResponse-api'); throw new Error(`Failed to generate primary content with model ${modelToUse}: ${getErrorMessage(error)}`); } } async *sendMessageStream(request, signal, turns = this.MAX_TURNS, options) { // Check if we should use collaboration const firstPart = Array.isArray(request) ? request[0] : request; const textContent = firstPart && typeof firstPart === 'object' && 'text' in firstPart ? firstPart.text : ''; if (this.collaborationEngine && textContent && options?.verify) { // Use collaboration engine for verification const verifiedResponse = await this.collaborationEngine.generateWithVerification(textContent); // Convert to stream events yield { type: GeminiEventType.ModelResponse, value: { candidates: [ { content: { parts: [{ text: verifiedResponse.content }], role: 'model', }, finishReason: 'STOP', }, ], text: verifiedResponse.content, }, }; return new Turn(this.getChat()); } // Original implementation const boundedTurns = Math.min(turns, this.MAX_TURNS); if (!boundedTurns) { return new Turn(this.getChat()); } const compressed = await this.tryCompressChat(); if (compressed) { yield { type: GeminiEventType.ChatCompressed, value: compressed }; } const turn = new Turn(this.getChat()); const resultStream = turn.run(request, signal); for await (const event of resultStream) { yield event; } if (!turn.pendingToolCalls.length && signal && !signal.aborted) { const nextSpeakerCheck = await checkNextSpeaker(this.getChat(), this, signal); if (nextSpeakerCheck?.next_speaker === 'model') { const nextRequest = [{ text: 'Please continue.' }]; yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1, options); } } return turn; } // New: Collaboration utility methods getCollaborationStatus() { const config = this.config.getCollaborationConfig(); if (!config?.enabled || !this.autoTriggerSystem) { return { enabled: false }; } return { enabled: true, costStatus: this.autoTriggerSystem.getCostStatus(), }; } updateCollaborationSettings(settings) { if (!this.autoTriggerSystem) return; const currentConfig = createDefaultAutoTriggerConfig(); if (settings.aggressiveness) { currentConfig.userPreferences.aggressiveness = settings.aggressiveness; } if (settings.enableCostAwareMode !== undefined) { currentConfig.userPreferences.enableCostAwareMode = settings.enableCostAwareMode; } this.autoTriggerSystem.updateConfig(currentConfig); } } //# sourceMappingURL=client.js.map