@quantumai/quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
491 lines • 20.5 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
// DISCLAIMER: This is a copied version of https://github.com/googleapis/js-genai/blob/main/src/chats.ts with the intention of working around a key bug
// where function responses are not treated as "valid" responses: https://b.corp.google.com/issues/420354090
import { createUserContent, } from '@google/genai';
import { retryWithBackoff } from '../utils/retry.js';
import { isFunctionResponse } from '../utils/messageInspectors.js';
import { AuthType } from './contentGenerator.js';
import { logApiRequest, logApiResponse, logApiError, } from '../telemetry/loggers.js';
import { getStructuredResponse, getStructuredResponseFromParts, } from '../utils/generateContentResponseUtilities.js';
import { ApiErrorEvent, ApiRequestEvent, ApiResponseEvent, } from '../telemetry/types.js';
import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js';
/**
* Returns true if the response is valid, false otherwise.
*/
function isValidResponse(response) {
if (response.candidates === undefined || response.candidates.length === 0) {
return false;
}
const content = response.candidates[0]?.content;
if (content === undefined) {
return false;
}
return isValidContent(content);
}
function isValidContent(content) {
if (content.parts === undefined || content.parts.length === 0) {
return false;
}
for (const part of content.parts) {
if (part === undefined || Object.keys(part).length === 0) {
return false;
}
if (!part.thought && part.text !== undefined && part.text === '') {
return false;
}
}
return true;
}
/**
* Validates the history contains the correct roles.
*
* @throws Error if the history does not start with a user turn.
* @throws Error if the history contains an invalid role.
*/
function validateHistory(history) {
for (const content of history) {
if (content.role !== 'user' && content.role !== 'model') {
throw new Error(`Role must be user or model, but got ${content.role}.`);
}
}
}
/**
* Extracts the curated (valid) history from a comprehensive history.
*
* @remarks
* The model may sometimes generate invalid or empty contents(e.g., due to safety
* filters or recitation). Extracting valid turns from the history
* ensures that subsequent requests could be accepted by the model.
*/
function extractCuratedHistory(comprehensiveHistory) {
if (comprehensiveHistory === undefined || comprehensiveHistory.length === 0) {
return [];
}
const curatedHistory = [];
const length = comprehensiveHistory.length;
let i = 0;
while (i < length) {
if (comprehensiveHistory[i].role === 'user') {
curatedHistory.push(comprehensiveHistory[i]);
i++;
}
else {
const modelOutput = [];
let isValid = true;
while (i < length && comprehensiveHistory[i].role === 'model') {
modelOutput.push(comprehensiveHistory[i]);
if (isValid && !isValidContent(comprehensiveHistory[i])) {
isValid = false;
}
i++;
}
if (isValid) {
curatedHistory.push(...modelOutput);
}
else {
// Remove the last user input when model content is invalid.
curatedHistory.pop();
}
}
}
return curatedHistory;
}
/**
* Chat session that enables sending messages to the model with previous
* conversation context.
*
* @remarks
* The session maintains all the turns between user and model.
*/
export class GeminiChat {
config;
contentGenerator;
generationConfig;
history;
// A promise to represent the current state of the message being sent to the
// model.
sendPromise = Promise.resolve();
constructor(config, contentGenerator, generationConfig = {}, history = []) {
this.config = config;
this.contentGenerator = contentGenerator;
this.generationConfig = generationConfig;
this.history = history;
validateHistory(history);
}
_getRequestTextFromContents(contents) {
return contents
.flatMap((content) => content.parts ?? [])
.map((part) => part.text)
.filter(Boolean)
.join('');
}
async _logApiRequest(contents, model) {
const requestText = this._getRequestTextFromContents(contents);
logApiRequest(this.config, new ApiRequestEvent(model, requestText));
}
async _logApiResponse(durationMs, usageMetadata, responseText) {
logApiResponse(this.config, new ApiResponseEvent(this.config.getModel(), durationMs, usageMetadata, responseText));
}
_logApiError(durationMs, error) {
const errorMessage = error instanceof Error ? error.message : String(error);
const errorType = error instanceof Error ? error.name : 'unknown';
logApiError(this.config, new ApiErrorEvent(this.config.getModel(), errorMessage, durationMs, errorType));
}
/**
* Handles fallback to Flash model when persistent 429 errors occur for OAuth users.
* Uses a fallback handler if provided by the config, otherwise returns null.
*/
async handleFlashFallback(authType) {
// Only handle fallback for OAuth users
if (authType !== AuthType.LOGIN_WITH_GOOGLE) {
return null;
}
const currentModel = this.config.getModel();
const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL;
// Don't fallback if already using Flash model
if (currentModel === fallbackModel) {
return null;
}
// Check if config has a fallback handler (set by CLI package)
const fallbackHandler = this.config.flashFallbackHandler;
if (typeof fallbackHandler === 'function') {
try {
const accepted = await fallbackHandler(currentModel, fallbackModel);
if (accepted) {
this.config.setModel(fallbackModel);
return fallbackModel;
}
}
catch (error) {
console.warn('Flash fallback handler failed:', error);
}
}
return null;
}
/**
* Sends a message to the model and returns the response.
*
* @remarks
* This method will wait for the previous message to be processed before
* sending the next message.
*
* @see {@link Chat#sendMessageStream} for streaming method.
* @param params - parameters for sending messages within a chat session.
* @returns The model's response.
*
* @example
* ```ts
* const chat = ai.chats.create({model: 'gemini-2.0-flash'});
* const response = await chat.sendMessage({
* message: 'Why is the sky blue?'
* });
* console.log(response.text);
* ```
*/
async sendMessage(params) {
await this.sendPromise;
const userContent = createUserContent(params.message);
const requestContents = this.getHistory(true).concat(userContent);
this._logApiRequest(requestContents, this.config.getModel());
const startTime = Date.now();
let response;
try {
const apiCall = () => this.contentGenerator.generateContent({
model: this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL,
contents: requestContents,
config: { ...this.generationConfig, ...params.config },
});
response = await retryWithBackoff(apiCall, {
shouldRetry: (error) => {
if (error && error.message) {
if (error.message.includes('429'))
return true;
if (error.message.match(/5\d{2}/))
return true;
}
return false;
},
onPersistent429: async (authType) => await this.handleFlashFallback(authType),
authType: this.config.getContentGeneratorConfig()?.authType,
});
const durationMs = Date.now() - startTime;
await this._logApiResponse(durationMs, response.usageMetadata, getStructuredResponse(response));
this.sendPromise = (async () => {
const outputContent = response.candidates?.[0]?.content;
// Because the AFC input contains the entire curated chat history in
// addition to the new user input, we need to truncate the AFC history
// to deduplicate the existing chat history.
const fullAutomaticFunctionCallingHistory = response.automaticFunctionCallingHistory;
const index = this.getHistory(true).length;
let automaticFunctionCallingHistory = [];
if (fullAutomaticFunctionCallingHistory != null) {
automaticFunctionCallingHistory =
fullAutomaticFunctionCallingHistory.slice(index) ?? [];
}
const modelOutput = outputContent ? [outputContent] : [];
this.recordHistory(userContent, modelOutput, automaticFunctionCallingHistory);
})();
await this.sendPromise.catch(() => {
// Resets sendPromise to avoid subsequent calls failing
this.sendPromise = Promise.resolve();
});
return response;
}
catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError(durationMs, error);
this.sendPromise = Promise.resolve();
throw error;
}
}
/**
* Sends a message to the model and returns the response in chunks.
*
* @remarks
* This method will wait for the previous message to be processed before
* sending the next message.
*
* @see {@link Chat#sendMessage} for non-streaming method.
* @param params - parameters for sending the message.
* @return The model's response.
*
* @example
* ```ts
* const chat = ai.chats.create({model: 'gemini-2.0-flash'});
* const response = await chat.sendMessageStream({
* message: 'Why is the sky blue?'
* });
* for await (const chunk of response) {
* console.log(chunk.text);
* }
* ```
*/
async sendMessageStream(params) {
await this.sendPromise;
const userContent = createUserContent(params.message);
const requestContents = this.getHistory(true).concat(userContent);
this._logApiRequest(requestContents, this.config.getModel());
const startTime = Date.now();
try {
const apiCall = () => this.contentGenerator.generateContentStream({
model: this.config.getModel(),
contents: requestContents,
config: { ...this.generationConfig, ...params.config },
});
// Note: Retrying streams can be complex. If generateContentStream itself doesn't handle retries
// for transient issues internally before yielding the async generator, this retry will re-initiate
// the stream. For simple 429/500 errors on initial call, this is fine.
// If errors occur mid-stream, this setup won't resume the stream; it will restart it.
const streamResponse = await retryWithBackoff(apiCall, {
shouldRetry: (error) => {
// Check error messages for status codes, or specific error names if known
if (error && error.message) {
if (error.message.includes('429'))
return true;
if (error.message.match(/5\d{2}/))
return true;
}
return false; // Don't retry other errors by default
},
onPersistent429: async (authType) => await this.handleFlashFallback(authType),
authType: this.config.getContentGeneratorConfig()?.authType,
});
// Resolve the internal tracking of send completion promise - `sendPromise`
// for both success and failure response. The actual failure is still
// propagated by the `await streamResponse`.
this.sendPromise = Promise.resolve(streamResponse)
.then(() => undefined)
.catch(() => undefined);
const result = this.processStreamResponse(streamResponse, userContent, startTime);
return result;
}
catch (error) {
const durationMs = Date.now() - startTime;
this._logApiError(durationMs, error);
this.sendPromise = Promise.resolve();
throw error;
}
}
/**
* Returns the chat history.
*
* @remarks
* The history is a list of contents alternating between user and model.
*
* There are two types of history:
* - The `curated history` contains only the valid turns between user and
* model, which will be included in the subsequent requests sent to the model.
* - The `comprehensive history` contains all turns, including invalid or
* empty model outputs, providing a complete record of the history.
*
* The history is updated after receiving the response from the model,
* for streaming response, it means receiving the last chunk of the response.
*
* The `comprehensive history` is returned by default. To get the `curated
* history`, set the `curated` parameter to `true`.
*
* @param curated - whether to return the curated history or the comprehensive
* history.
* @return History contents alternating between user and model for the entire
* chat session.
*/
getHistory(curated = false) {
const history = curated
? extractCuratedHistory(this.history)
: this.history;
// Deep copy the history to avoid mutating the history outside of the
// chat session.
return structuredClone(history);
}
/**
* Clears the chat history.
*/
clearHistory() {
this.history = [];
}
/**
* Adds a new entry to the chat history.
*
* @param content - The content to add to the history.
*/
addHistory(content) {
this.history.push(content);
}
setHistory(history) {
this.history = history;
}
getFinalUsageMetadata(chunks) {
const lastChunkWithMetadata = chunks
.slice()
.reverse()
.find((chunk) => chunk.usageMetadata);
return lastChunkWithMetadata?.usageMetadata;
}
async *processStreamResponse(streamResponse, inputContent, startTime) {
const outputContent = [];
const chunks = [];
let errorOccurred = false;
try {
for await (const chunk of streamResponse) {
if (isValidResponse(chunk)) {
chunks.push(chunk);
const content = chunk.candidates?.[0]?.content;
if (content !== undefined) {
if (this.isThoughtContent(content)) {
yield chunk;
continue;
}
outputContent.push(content);
}
}
yield chunk;
}
}
catch (error) {
errorOccurred = true;
const durationMs = Date.now() - startTime;
this._logApiError(durationMs, error);
throw error;
}
if (!errorOccurred) {
const durationMs = Date.now() - startTime;
const allParts = [];
for (const content of outputContent) {
if (content.parts) {
allParts.push(...content.parts);
}
}
const fullText = getStructuredResponseFromParts(allParts);
await this._logApiResponse(durationMs, this.getFinalUsageMetadata(chunks), fullText);
}
this.recordHistory(inputContent, outputContent);
}
recordHistory(userInput, modelOutput, automaticFunctionCallingHistory) {
const nonThoughtModelOutput = modelOutput.filter((content) => !this.isThoughtContent(content));
let outputContents = [];
if (nonThoughtModelOutput.length > 0 &&
nonThoughtModelOutput.every((content) => content.role !== undefined)) {
outputContents = nonThoughtModelOutput;
}
else if (nonThoughtModelOutput.length === 0 && modelOutput.length > 0) {
// This case handles when the model returns only a thought.
// We don't want to add an empty model response in this case.
}
else {
// When not a function response appends an empty content when model returns empty response, so that the
// history is always alternating between user and model.
// Workaround for: https://b.corp.google.com/issues/420354090
if (!isFunctionResponse(userInput)) {
outputContents.push({
role: 'model',
parts: [],
});
}
}
if (automaticFunctionCallingHistory &&
automaticFunctionCallingHistory.length > 0) {
this.history.push(...extractCuratedHistory(automaticFunctionCallingHistory));
}
else {
this.history.push(userInput);
}
// Consolidate adjacent model roles in outputContents
const consolidatedOutputContents = [];
for (const content of outputContents) {
if (this.isThoughtContent(content)) {
continue;
}
const lastContent = consolidatedOutputContents[consolidatedOutputContents.length - 1];
if (this.isTextContent(lastContent) && this.isTextContent(content)) {
// If both current and last are text, combine their text into the lastContent's first part
// and append any other parts from the current content.
lastContent.parts[0].text += content.parts[0].text || '';
if (content.parts.length > 1) {
lastContent.parts.push(...content.parts.slice(1));
}
}
else {
consolidatedOutputContents.push(content);
}
}
if (consolidatedOutputContents.length > 0) {
const lastHistoryEntry = this.history[this.history.length - 1];
const canMergeWithLastHistory = !automaticFunctionCallingHistory ||
automaticFunctionCallingHistory.length === 0;
if (canMergeWithLastHistory &&
this.isTextContent(lastHistoryEntry) &&
this.isTextContent(consolidatedOutputContents[0])) {
// If both current and last are text, combine their text into the lastHistoryEntry's first part
// and append any other parts from the current content.
lastHistoryEntry.parts[0].text +=
consolidatedOutputContents[0].parts[0].text || '';
if (consolidatedOutputContents[0].parts.length > 1) {
lastHistoryEntry.parts.push(...consolidatedOutputContents[0].parts.slice(1));
}
consolidatedOutputContents.shift(); // Remove the first element as it's merged
}
this.history.push(...consolidatedOutputContents);
}
}
isTextContent(content) {
return !!(content &&
content.role === 'model' &&
content.parts &&
content.parts.length > 0 &&
typeof content.parts[0].text === 'string' &&
content.parts[0].text !== '');
}
isThoughtContent(content) {
return !!(content &&
content.role === 'model' &&
content.parts &&
content.parts.length > 0 &&
typeof content.parts[0].thought === 'boolean' &&
content.parts[0].thought === true);
}
}
//# sourceMappingURL=geminiChat.js.map