UNPKG

@quantumai/quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

github.com/kanghunlee/quantum-cli

kanghunlee/quantum-cli

491 lines • 20.5 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ // DISCLAIMER: This is a copied version of https://github.com/googleapis/js-genai/blob/main/src/chats.ts with the intention of working around a key bug // where function responses are not treated as "valid" responses: https://b.corp.google.com/issues/420354090 import { createUserContent, } from '@google/genai'; import { retryWithBackoff } from '../utils/retry.js'; import { isFunctionResponse } from '../utils/messageInspectors.js'; import { AuthType } from './contentGenerator.js'; import { logApiRequest, logApiResponse, logApiError, } from '../telemetry/loggers.js'; import { getStructuredResponse, getStructuredResponseFromParts, } from '../utils/generateContentResponseUtilities.js'; import { ApiErrorEvent, ApiRequestEvent, ApiResponseEvent, } from '../telemetry/types.js'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; /** * Returns true if the response is valid, false otherwise. */ function isValidResponse(response) { if (response.candidates === undefined || response.candidates.length === 0) { return false; } const content = response.candidates[0]?.content; if (content === undefined) { return false; } return isValidContent(content); } function isValidContent(content) { if (content.parts === undefined || content.parts.length === 0) { return false; } for (const part of content.parts) { if (part === undefined || Object.keys(part).length === 0) { return false; } if (!part.thought && part.text !== undefined && part.text === '') { return false; } } return true; } /** * Validates the history contains the correct roles. * * @throws Error if the history does not start with a user turn. * @throws Error if the history contains an invalid role. */ function validateHistory(history) { for (const content of history) { if (content.role !== 'user' && content.role !== 'model') { throw new Error(`Role must be user or model, but got ${content.role}.`); } } } /** * Extracts the curated (valid) history from a comprehensive history. * * @remarks * The model may sometimes generate invalid or empty contents(e.g., due to safety * filters or recitation). Extracting valid turns from the history * ensures that subsequent requests could be accepted by the model. */ function extractCuratedHistory(comprehensiveHistory) { if (comprehensiveHistory === undefined || comprehensiveHistory.length === 0) { return []; } const curatedHistory = []; const length = comprehensiveHistory.length; let i = 0; while (i < length) { if (comprehensiveHistory[i].role === 'user') { curatedHistory.push(comprehensiveHistory[i]); i++; } else { const modelOutput = []; let isValid = true; while (i < length && comprehensiveHistory[i].role === 'model') { modelOutput.push(comprehensiveHistory[i]); if (isValid && !isValidContent(comprehensiveHistory[i])) { isValid = false; } i++; } if (isValid) { curatedHistory.push(...modelOutput); } else { // Remove the last user input when model content is invalid. curatedHistory.pop(); } } } return curatedHistory; } /** * Chat session that enables sending messages to the model with previous * conversation context. * * @remarks * The session maintains all the turns between user and model. */ export class GeminiChat { config; contentGenerator; generationConfig; history; // A promise to represent the current state of the message being sent to the // model. sendPromise = Promise.resolve(); constructor(config, contentGenerator, generationConfig = {}, history = []) { this.config = config; this.contentGenerator = contentGenerator; this.generationConfig = generationConfig; this.history = history; validateHistory(history); } _getRequestTextFromContents(contents) { return contents .flatMap((content) => content.parts ?? []) .map((part) => part.text) .filter(Boolean) .join(''); } async _logApiRequest(contents, model) { const requestText = this._getRequestTextFromContents(contents); logApiRequest(this.config, new ApiRequestEvent(model, requestText)); } async _logApiResponse(durationMs, usageMetadata, responseText) { logApiResponse(this.config, new ApiResponseEvent(this.config.getModel(), durationMs, usageMetadata, responseText)); } _logApiError(durationMs, error) { const errorMessage = error instanceof Error ? error.message : String(error); const errorType = error instanceof Error ? error.name : 'unknown'; logApiError(this.config, new ApiErrorEvent(this.config.getModel(), errorMessage, durationMs, errorType)); } /** * Handles fallback to Flash model when persistent 429 errors occur for OAuth users. * Uses a fallback handler if provided by the config, otherwise returns null. */ async handleFlashFallback(authType) { // Only handle fallback for OAuth users if (authType !== AuthType.LOGIN_WITH_GOOGLE) { return null; } const currentModel = this.config.getModel(); const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL; // Don't fallback if already using Flash model if (currentModel === fallbackModel) { return null; } // Check if config has a fallback handler (set by CLI package) const fallbackHandler = this.config.flashFallbackHandler; if (typeof fallbackHandler === 'function') { try { const accepted = await fallbackHandler(currentModel, fallbackModel); if (accepted) { this.config.setModel(fallbackModel); return fallbackModel; } } catch (error) { console.warn('Flash fallback handler failed:', error); } } return null; } /** * Sends a message to the model and returns the response. * * @remarks * This method will wait for the previous message to be processed before * sending the next message. * * @see {@link Chat#sendMessageStream} for streaming method. * @param params - parameters for sending messages within a chat session. * @returns The model's response. * * @example * ```ts * const chat = ai.chats.create({model: 'gemini-2.0-flash'}); * const response = await chat.sendMessage({ * message: 'Why is the sky blue?' * }); * console.log(response.text); * ``` */ async sendMessage(params) { await this.sendPromise; const userContent = createUserContent(params.message); const requestContents = this.getHistory(true).concat(userContent); this._logApiRequest(requestContents, this.config.getModel()); const startTime = Date.now(); let response; try { const apiCall = () => this.contentGenerator.generateContent({ model: this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL, contents: requestContents, config: { ...this.generationConfig, ...params.config }, }); response = await retryWithBackoff(apiCall, { shouldRetry: (error) => { if (error && error.message) { if (error.message.includes('429')) return true; if (error.message.match(/5\d{2}/)) return true; } return false; }, onPersistent429: async (authType) => await this.handleFlashFallback(authType), authType: this.config.getContentGeneratorConfig()?.authType, }); const durationMs = Date.now() - startTime; await this._logApiResponse(durationMs, response.usageMetadata, getStructuredResponse(response)); this.sendPromise = (async () => { const outputContent = response.candidates?.[0]?.content; // Because the AFC input contains the entire curated chat history in // addition to the new user input, we need to truncate the AFC history // to deduplicate the existing chat history. const fullAutomaticFunctionCallingHistory = response.automaticFunctionCallingHistory; const index = this.getHistory(true).length; let automaticFunctionCallingHistory = []; if (fullAutomaticFunctionCallingHistory != null) { automaticFunctionCallingHistory = fullAutomaticFunctionCallingHistory.slice(index) ?? []; } const modelOutput = outputContent ? [outputContent] : []; this.recordHistory(userContent, modelOutput, automaticFunctionCallingHistory); })(); await this.sendPromise.catch(() => { // Resets sendPromise to avoid subsequent calls failing this.sendPromise = Promise.resolve(); }); return response; } catch (error) { const durationMs = Date.now() - startTime; this._logApiError(durationMs, error); this.sendPromise = Promise.resolve(); throw error; } } /** * Sends a message to the model and returns the response in chunks. * * @remarks * This method will wait for the previous message to be processed before * sending the next message. * * @see {@link Chat#sendMessage} for non-streaming method. * @param params - parameters for sending the message. * @return The model's response. * * @example * ```ts * const chat = ai.chats.create({model: 'gemini-2.0-flash'}); * const response = await chat.sendMessageStream({ * message: 'Why is the sky blue?' * }); * for await (const chunk of response) { * console.log(chunk.text); * } * ``` */ async sendMessageStream(params) { await this.sendPromise; const userContent = createUserContent(params.message); const requestContents = this.getHistory(true).concat(userContent); this._logApiRequest(requestContents, this.config.getModel()); const startTime = Date.now(); try { const apiCall = () => this.contentGenerator.generateContentStream({ model: this.config.getModel(), contents: requestContents, config: { ...this.generationConfig, ...params.config }, }); // Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries // for transient issues internally before yielding the async generator, this retry will re-initiate // the stream. For simple 429/500 errors on initial call, this is fine. // If errors occur mid-stream, this setup won't resume the stream; it will restart it. const streamResponse = await retryWithBackoff(apiCall, { shouldRetry: (error) => { // Check error messages for status codes, or specific error names if known if (error && error.message) { if (error.message.includes('429')) return true; if (error.message.match(/5\d{2}/)) return true; } return false; // Don't retry other errors by default }, onPersistent429: async (authType) => await this.handleFlashFallback(authType), authType: this.config.getContentGeneratorConfig()?.authType, }); // Resolve the internal tracking of send completion promise - `sendPromise` // for both success and failure response. The actual failure is still // propagated by the `await streamResponse`. this.sendPromise = Promise.resolve(streamResponse) .then(() => undefined) .catch(() => undefined); const result = this.processStreamResponse(streamResponse, userContent, startTime); return result; } catch (error) { const durationMs = Date.now() - startTime; this._logApiError(durationMs, error); this.sendPromise = Promise.resolve(); throw error; } } /** * Returns the chat history. * * @remarks * The history is a list of contents alternating between user and model. * * There are two types of history: * - The `curated history` contains only the valid turns between user and * model, which will be included in the subsequent requests sent to the model. * - The `comprehensive history` contains all turns, including invalid or * empty model outputs, providing a complete record of the history. * * The history is updated after receiving the response from the model, * for streaming response, it means receiving the last chunk of the response. * * The `comprehensive history` is returned by default. To get the `curated * history`, set the `curated` parameter to `true`. * * @param curated - whether to return the curated history or the comprehensive * history. * @return History contents alternating between user and model for the entire * chat session. */ getHistory(curated = false) { const history = curated ? extractCuratedHistory(this.history) : this.history; // Deep copy the history to avoid mutating the history outside of the // chat session. return structuredClone(history); } /** * Clears the chat history. */ clearHistory() { this.history = []; } /** * Adds a new entry to the chat history. * * @param content - The content to add to the history. */ addHistory(content) { this.history.push(content); } setHistory(history) { this.history = history; } getFinalUsageMetadata(chunks) { const lastChunkWithMetadata = chunks .slice() .reverse() .find((chunk) => chunk.usageMetadata); return lastChunkWithMetadata?.usageMetadata; } async *processStreamResponse(streamResponse, inputContent, startTime) { const outputContent = []; const chunks = []; let errorOccurred = false; try { for await (const chunk of streamResponse) { if (isValidResponse(chunk)) { chunks.push(chunk); const content = chunk.candidates?.[0]?.content; if (content !== undefined) { if (this.isThoughtContent(content)) { yield chunk; continue; } outputContent.push(content); } } yield chunk; } } catch (error) { errorOccurred = true; const durationMs = Date.now() - startTime; this._logApiError(durationMs, error); throw error; } if (!errorOccurred) { const durationMs = Date.now() - startTime; const allParts = []; for (const content of outputContent) { if (content.parts) { allParts.push(...content.parts); } } const fullText = getStructuredResponseFromParts(allParts); await this._logApiResponse(durationMs, this.getFinalUsageMetadata(chunks), fullText); } this.recordHistory(inputContent, outputContent); } recordHistory(userInput, modelOutput, automaticFunctionCallingHistory) { const nonThoughtModelOutput = modelOutput.filter((content) => !this.isThoughtContent(content)); let outputContents = []; if (nonThoughtModelOutput.length > 0 && nonThoughtModelOutput.every((content) => content.role !== undefined)) { outputContents = nonThoughtModelOutput; } else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) { // This case handles when the model returns only a thought. // We don't want to add an empty model response in this case. } else { // When not a function response appends an empty content when model returns empty response, so that the // history is always alternating between user and model. // Workaround for: https://b.corp.google.com/issues/420354090 if (!isFunctionResponse(userInput)) { outputContents.push({ role: 'model', parts: [], }); } } if (automaticFunctionCallingHistory && automaticFunctionCallingHistory.length > 0) { this.history.push(...extractCuratedHistory(automaticFunctionCallingHistory)); } else { this.history.push(userInput); } // Consolidate adjacent model roles in outputContents const consolidatedOutputContents = []; for (const content of outputContents) { if (this.isThoughtContent(content)) { continue; } const lastContent = consolidatedOutputContents[consolidatedOutputContents.length - 1]; if (this.isTextContent(lastContent) && this.isTextContent(content)) { // If both current and last are text, combine their text into the lastContent's first part // and append any other parts from the current content. lastContent.parts[0].text += content.parts[0].text || ''; if (content.parts.length > 1) { lastContent.parts.push(...content.parts.slice(1)); } } else { consolidatedOutputContents.push(content); } } if (consolidatedOutputContents.length > 0) { const lastHistoryEntry = this.history[this.history.length - 1]; const canMergeWithLastHistory = !automaticFunctionCallingHistory || automaticFunctionCallingHistory.length === 0; if (canMergeWithLastHistory && this.isTextContent(lastHistoryEntry) && this.isTextContent(consolidatedOutputContents[0])) { // If both current and last are text, combine their text into the lastHistoryEntry's first part // and append any other parts from the current content. lastHistoryEntry.parts[0].text += consolidatedOutputContents[0].parts[0].text || ''; if (consolidatedOutputContents[0].parts.length > 1) { lastHistoryEntry.parts.push(...consolidatedOutputContents[0].parts.slice(1)); } consolidatedOutputContents.shift(); // Remove the first element as it's merged } this.history.push(...consolidatedOutputContents); } } isTextContent(content) { return !!(content && content.role === 'model' && content.parts && content.parts.length > 0 && typeof content.parts[0].text === 'string' && content.parts[0].text !== ''); } isThoughtContent(content) { return !!(content && content.role === 'model' && content.parts && content.parts.length > 0 && typeof content.parts[0].thought === 'boolean' && content.parts[0].thought === true); } } //# sourceMappingURL=geminiChat.js.map