@quantumai/quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
559 lines • 24.1 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { getFolderStructure } from '../utils/getFolderStructure.js';
import { Turn, GeminiEventType, } from './turn.js';
import { getCoreSystemPrompt, getCompressionPrompt } from './prompts.js';
import { getResponseText } from '../utils/generateContentResponseUtilities.js';
import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js';
import { reportError } from '../utils/errorReporting.js';
import { GeminiChat } from './geminiChat.js';
import { retryWithBackoff } from '../utils/retry.js';
import { getErrorMessage } from '../utils/errors.js';
import { tokenLimit } from './tokenLimits.js';
import { AuthType, createContentGenerator, } from './contentGenerator.js';
import { ProxyAgent, setGlobalDispatcher } from 'undici';
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
import { CollaborationEngine } from '../collaboration/collaboration-engine.js';
import { UncertaintyDetector } from '../collaboration/detection/uncertainty-detector.js';
import { AutoTriggerSystem, createDefaultAutoTriggerConfig, } from '../collaboration/detection/auto-trigger.js';
import { isThinkingSupported, findIndexAfterFraction, } from '../utils/model-utils.js';
export { findIndexAfterFraction } from '../utils/model-utils.js';
export class GeminiClient {
config;
chat;
contentGenerator;
embeddingModel;
generateContentConfig = {
temperature: 0,
topP: 1,
};
MAX_TURNS = 100;
/**
* Threshold for compression token count as a fraction of the model's token limit.
* If the chat history exceeds this threshold, it will be compressed.
*/
COMPRESSION_TOKEN_THRESHOLD = 0.7;
/**
* The fraction of the latest chat history to keep. A value of 0.3
* means that only the last 30% of the chat history will be kept after compression.
*/
COMPRESSION_PRESERVE_THRESHOLD = 0.3;
collaborationEngine;
uncertaintyDetector;
autoTriggerSystem;
constructor(config) {
this.config = config;
if (config.getProxy()) {
setGlobalDispatcher(new ProxyAgent(config.getProxy()));
}
this.embeddingModel = config.getEmbeddingModel();
// New: Initialize CollaborationEngine if collaboration is enabled
const collaborationConfig = this.config.getCollaborationConfig();
if (collaborationConfig?.enabled) {
this.collaborationEngine = new CollaborationEngine(this.config);
this.uncertaintyDetector = new UncertaintyDetector();
// Initialize AutoTriggerSystem with default config and override from collaboration config
const autoTriggerConfig = createDefaultAutoTriggerConfig();
if (collaborationConfig.autoVerifyThreshold !== undefined) {
autoTriggerConfig.uncertaintyThresholds.verify =
collaborationConfig.autoVerifyThreshold; // Convert number to UncertaintyLevel
}
if (collaborationConfig.maxCostPerQuery !== undefined) {
autoTriggerConfig.costLimitation.monthlyLimit = Math.floor(1000 / collaborationConfig.maxCostPerQuery);
}
this.autoTriggerSystem = new AutoTriggerSystem(autoTriggerConfig);
}
}
async initialize(contentGeneratorConfig) {
this.contentGenerator = await createContentGenerator(contentGeneratorConfig, this.config.getSessionId());
this.chat = await this.startChat();
}
getContentGenerator() {
if (!this.contentGenerator) {
throw new Error('Content generator not initialized');
}
return this.contentGenerator;
}
async addHistory(content) {
this.getChat().addHistory(content);
}
getChat() {
if (!this.chat) {
throw new Error('Chat not initialized');
}
return this.chat;
}
getHistory() {
return this.getChat().getHistory();
}
setHistory(history) {
this.getChat().setHistory(history);
}
async resetChat() {
this.chat = await this.startChat();
}
async getEnvironment() {
const cwd = this.config.getWorkingDir();
const today = new Date().toLocaleDateString(undefined, {
weekday: 'long',
year: 'numeric',
month: 'long',
day: 'numeric',
});
const platform = process.platform;
const folderStructure = await getFolderStructure(cwd, {
fileService: this.config.getFileService(),
});
const context = `
This is the Gemini CLI. We are setting up the context for our chat.
Today's date is ${today}.
My operating system is: ${platform}
I'm currently working in the directory: ${cwd}
${folderStructure}
`.trim();
const initialParts = [{ text: context }];
const toolRegistry = await this.config.getToolRegistry();
// Add full file context if the flag is set
if (this.config.getFullContext()) {
try {
const readManyFilesTool = toolRegistry.getTool('read_many_files');
if (readManyFilesTool) {
// Read all files in the target directory
const result = await readManyFilesTool.execute({
paths: ['**/*'], // Read everything recursively
useDefaultExcludes: true, // Use default excludes
}, AbortSignal.timeout(30000));
if (result.llmContent) {
initialParts.push({
text: `\n--- Full File Context ---\n${result.llmContent}`,
});
}
else {
console.warn('Full context requested, but read_many_files returned no content.');
}
}
else {
console.warn('Full context requested, but read_many_files tool not found.');
}
}
catch (error) {
// Not using reportError here as it's a startup/config phase, not a chat/generation phase error.
console.error('Error reading full file context:', error);
initialParts.push({
text: '\n--- Error reading full file context ---',
});
}
}
return initialParts;
}
async startChat(extraHistory) {
const envParts = await this.getEnvironment();
const toolRegistry = await this.config.getToolRegistry();
const toolDeclarations = toolRegistry.getFunctionDeclarations();
const tools = [{ functionDeclarations: toolDeclarations }];
const history = [
{
role: 'user',
parts: envParts,
},
{
role: 'model',
parts: [{ text: 'Got it. Thanks for the context!' }],
},
...(extraHistory ?? []),
];
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
const generateContentConfigWithThinking = isThinkingSupported(this.config.getModel())
? {
...this.generateContentConfig,
thinkingConfig: {
includeThoughts: true,
},
}
: this.generateContentConfig;
return new GeminiChat(this.config, this.getContentGenerator(), {
systemInstruction,
...generateContentConfigWithThinking,
tools,
}, history);
}
catch (error) {
await reportError(error, 'Error initializing Gemini chat session.', history, 'startChat');
throw new Error(`Failed to initialize chat: ${getErrorMessage(error)}`);
}
}
async generateJson(contents, schema, abortSignal, model = DEFAULT_GEMINI_FLASH_MODEL, config = {}) {
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
const requestConfig = {
abortSignal,
...this.generateContentConfig,
...config,
};
const apiCall = () => this.getContentGenerator().generateContent({
model,
config: {
...requestConfig,
systemInstruction,
responseSchema: schema,
responseMimeType: 'application/json',
},
contents,
});
const result = await retryWithBackoff(apiCall, {
onPersistent429: async (authType) => await this.handleFlashFallback(authType),
authType: this.config.getContentGeneratorConfig()?.authType,
});
const text = getResponseText(result);
if (!text) {
const error = new Error('API returned an empty response for generateJson.');
await reportError(error, 'Error in generateJson: API returned an empty response.', contents, 'generateJson-empty-response');
throw error;
}
try {
return JSON.parse(text);
}
catch (parseError) {
await reportError(parseError, 'Failed to parse JSON response from generateJson.', {
responseTextFailedToParse: text,
originalRequestContents: contents,
}, 'generateJson-parse');
throw new Error(`Failed to parse API response as JSON: ${getErrorMessage(parseError)}`);
}
}
catch (error) {
if (abortSignal.aborted) {
throw error;
}
// Avoid double reporting for the empty response case handled above
if (error instanceof Error &&
error.message === 'API returned an empty response for generateJson.') {
throw error;
}
await reportError(error, 'Error generating JSON content via API.', contents, 'generateJson-api');
throw new Error(`Failed to generate JSON content: ${getErrorMessage(error)}`);
}
}
async generateContent(contents, generationConfig, abortSignal) {
// New: Check if multi-LLM should be used with smart triggering
const contentText = this.extractTextFromContents(contents);
if (this.collaborationEngine &&
this.uncertaintyDetector &&
this.autoTriggerSystem &&
contentText) {
// First, generate with primary model to analyze uncertainty
const primaryResponse = await this.generatePrimaryResponse(contents, generationConfig, abortSignal);
const primaryText = primaryResponse.text || '';
// Detect uncertainty in the response
const uncertaintyResult = this.uncertaintyDetector.detect(primaryText);
// Check if collaboration should be triggered
const triggerDecision = this.autoTriggerSystem.shouldTriggerVerification(contentText, uncertaintyResult);
if (triggerDecision.shouldVerify || triggerDecision.shouldCompare) {
console.log(`Auto-triggering collaboration: ${triggerDecision.reason}`);
this.autoTriggerSystem.incrementUsage();
const verifiedResponse = triggerDecision.shouldCompare
? await this.collaborationEngine.compareResponses(contentText)
: await this.collaborationEngine.generateWithVerification(contentText);
// Convert to GenerateContentResponse format
return {
candidates: [
{
content: {
parts: [{ text: verifiedResponse.content }],
role: 'model',
},
finishReason: 'STOP',
},
],
text: verifiedResponse.content,
data: undefined,
functionCalls: undefined,
executableCode: undefined,
codeExecutionResult: undefined,
};
}
// Use primary response if no collaboration needed
return primaryResponse;
}
const modelToUse = this.config.getModel();
const configToUse = {
...this.generateContentConfig,
...generationConfig,
};
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
const requestConfig = {
abortSignal,
...configToUse,
systemInstruction,
};
const apiCall = () => this.getContentGenerator().generateContent({
model: modelToUse,
config: requestConfig,
contents,
});
const result = await retryWithBackoff(apiCall, {
onPersistent429: async (authType) => await this.handleFlashFallback(authType),
authType: this.config.getContentGeneratorConfig()?.authType,
});
return result;
}
catch (error) {
if (abortSignal.aborted) {
throw error;
}
await reportError(error, `Error generating content via API with model ${modelToUse}.`, {
requestContents: contents,
requestConfig: configToUse,
}, 'generateContent-api');
throw new Error(`Failed to generate content with model ${modelToUse}: ${getErrorMessage(error)}`);
}
}
async generateEmbedding(texts) {
if (!texts || texts.length === 0) {
return [];
}
const embedModelParams = {
model: this.embeddingModel,
contents: texts,
};
const embedContentResponse = await this.getContentGenerator().embedContent(embedModelParams);
if (!embedContentResponse.embeddings ||
embedContentResponse.embeddings.length === 0) {
throw new Error('No embeddings found in API response.');
}
if (embedContentResponse.embeddings.length !== texts.length) {
throw new Error(`API returned a mismatched number of embeddings. Expected ${texts.length}, got ${embedContentResponse.embeddings.length}.`);
}
return embedContentResponse.embeddings.map((embedding, index) => {
const values = embedding.values;
if (!values || values.length === 0) {
throw new Error(`API returned an empty embedding for input text at index ${index}: "${texts[index]}"`);
}
return values;
});
}
async tryCompressChat(force = false) {
const curatedHistory = this.getChat().getHistory(true);
// Regardless of `force`, don't do anything if the history is empty.
if (curatedHistory.length === 0) {
return null;
}
const model = this.config.getModel();
const { totalTokens: originalTokenCount } = await this.getContentGenerator().countTokens({
model,
contents: curatedHistory,
});
if (originalTokenCount === undefined) {
console.warn(`Could not determine token count for model ${model}.`);
return null;
}
// Don't compress if not forced and we are under the limit.
if (!force &&
originalTokenCount < this.COMPRESSION_TOKEN_THRESHOLD * tokenLimit(model)) {
return null;
}
let compressBeforeIndex = findIndexAfterFraction(curatedHistory, 1 - this.COMPRESSION_PRESERVE_THRESHOLD);
// Find the first user message after the index. This is the start of the next turn.
while (compressBeforeIndex < curatedHistory.length &&
curatedHistory[compressBeforeIndex]?.role !== 'user') {
compressBeforeIndex++;
}
const historyToCompress = curatedHistory.slice(0, compressBeforeIndex);
const historyToKeep = curatedHistory.slice(compressBeforeIndex);
this.getChat().setHistory(historyToCompress);
const { text: summary } = await this.getChat().sendMessage({
message: {
text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.',
},
config: {
systemInstruction: { text: getCompressionPrompt() },
},
});
this.chat = await this.startChat([
{
role: 'user',
parts: [{ text: summary }],
},
{
role: 'model',
parts: [{ text: 'Got it. Thanks for the additional context!' }],
},
...historyToKeep,
]);
const { totalTokens: newTokenCount } = await this.getContentGenerator().countTokens({
// model might change after calling `sendMessage`, so we get the newest value from config
model: this.config.getModel(),
contents: this.getChat().getHistory(),
});
if (newTokenCount === undefined) {
console.warn('Could not determine compressed history token count.');
return null;
}
return {
originalTokenCount,
newTokenCount,
};
}
/**
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
async handleFlashFallback(authType) {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
}
const currentModel = this.config.getModel();
const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
// Don't fallback if already using Flash model
if (currentModel === fallbackModel) {
return null;
}
// Check if config has a fallback handler (set by CLI package)
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
const accepted = await fallbackHandler(currentModel, fallbackModel);
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;
}
}
catch (error) {
console.warn('Flash fallback handler failed:', error);
}
}
return null;
}
extractTextFromContents(contents) {
if (contents.length === 0)
return null;
const firstContent = contents[0];
if (firstContent.parts && firstContent.parts.length > 0) {
const firstPart = firstContent.parts[0];
if ('text' in firstPart && firstPart.text) {
return firstPart.text;
}
}
return null;
}
async generatePrimaryResponse(contents, generationConfig, abortSignal) {
const modelToUse = this.config.getModel();
const configToUse = {
...this.generateContentConfig,
...generationConfig,
};
try {
const userMemory = this.config.getUserMemory();
const systemInstruction = getCoreSystemPrompt(userMemory);
const requestConfig = {
abortSignal,
...configToUse,
systemInstruction,
};
const apiCall = () => this.getContentGenerator().generateContent({
model: modelToUse,
config: requestConfig,
contents,
});
const result = await retryWithBackoff(apiCall, {
onPersistent429: async (authType) => await this.handleFlashFallback(authType),
authType: this.config.getContentGeneratorConfig()?.authType,
});
return result;
}
catch (error) {
if (abortSignal.aborted) {
throw error;
}
await reportError(error, `Error generating primary content via API with model ${modelToUse}.`, {
requestContents: contents,
requestConfig: configToUse,
}, 'generatePrimaryResponse-api');
throw new Error(`Failed to generate primary content with model ${modelToUse}: ${getErrorMessage(error)}`);
}
}
async *sendMessageStream(request, signal, turns = this.MAX_TURNS, options) {
// Check if we should use collaboration
const firstPart = Array.isArray(request) ? request[0] : request;
const textContent = firstPart && typeof firstPart === 'object' && 'text' in firstPart
? firstPart.text
: '';
if (this.collaborationEngine && textContent && options?.verify) {
// Use collaboration engine for verification
const verifiedResponse = await this.collaborationEngine.generateWithVerification(textContent);
// Convert to stream events
yield {
type: GeminiEventType.ModelResponse,
value: {
candidates: [
{
content: {
parts: [{ text: verifiedResponse.content }],
role: 'model',
},
finishReason: 'STOP',
},
],
text: verifiedResponse.content,
},
};
return new Turn(this.getChat());
}
// Original implementation
const boundedTurns = Math.min(turns, this.MAX_TURNS);
if (!boundedTurns) {
return new Turn(this.getChat());
}
const compressed = await this.tryCompressChat();
if (compressed) {
yield { type: GeminiEventType.ChatCompressed, value: compressed };
}
const turn = new Turn(this.getChat());
const resultStream = turn.run(request, signal);
for await (const event of resultStream) {
yield event;
}
if (!turn.pendingToolCalls.length && signal && !signal.aborted) {
const nextSpeakerCheck = await checkNextSpeaker(this.getChat(), this, signal);
if (nextSpeakerCheck?.next_speaker === 'model') {
const nextRequest = [{ text: 'Please continue.' }];
yield* this.sendMessageStream(nextRequest, signal, boundedTurns - 1, options);
}
}
return turn;
}
// New: Collaboration utility methods
getCollaborationStatus() {
const config = this.config.getCollaborationConfig();
if (!config?.enabled || !this.autoTriggerSystem) {
return { enabled: false };
}
return {
enabled: true,
costStatus: this.autoTriggerSystem.getCostStatus(),
};
}
updateCollaborationSettings(settings) {
if (!this.autoTriggerSystem)
return;
const currentConfig = createDefaultAutoTriggerConfig();
if (settings.aggressiveness) {
currentConfig.userPreferences.aggressiveness = settings.aggressiveness;
}
if (settings.enableCostAwareMode !== undefined) {
currentConfig.userPreferences.enableCostAwareMode =
settings.enableCostAwareMode;
}
this.autoTriggerSystem.updateConfig(currentConfig);
}
}
//# sourceMappingURL=client.js.map