UNPKG

buroventures-harald-code-core

Version:

Harald Code Core - Core functionality for AI-powered coding assistant

github.com/haraldroine/harald-code

haraldroine/harald-code

614 lines • 27.4 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { getFolderStructure } from '../utils/getFolderStructure.js'; import { Turn, GeminiEventType, } from './turn.js'; import { getCoreSystemPrompt, getCompressionPrompt } from './prompts.js'; import { getFunctionCalls } from '../utils/generateContentResponseUtilities.js'; import { checkNextSpeaker } from '../utils/nextSpeakerChecker.js'; import { reportError } from '../utils/errorReporting.js'; import { GeminiChat } from './geminiChat.js'; import { retryWithBackoff } from '../utils/retry.js'; import { getErrorMessage } from '../utils/errors.js'; import { isFunctionResponse } from '../utils/messageInspectors.js'; import { tokenLimit } from './tokenLimits.js'; import { AuthType, createContentGenerator, } from './contentGenerator.js'; import { ProxyAgent, setGlobalDispatcher } from 'undici'; import { DEFAULT_GEMINI_FLASH_MODEL } from '../config/models.js'; import { LoopDetectionService } from '../services/loopDetectionService.js'; import { ideContext } from '../ide/ideContext.js'; import { logNextSpeakerCheck } from '../telemetry/loggers.js'; import { NextSpeakerCheckEvent } from '../telemetry/types.js'; function isThinkingSupported(model) { if (model.startsWith('gemini-2.5')) return true; return false; } /** * Returns the index of the content after the fraction of the total characters in the history. * * Exported for testing purposes. */ export function findIndexAfterFraction(history, fraction) { if (fraction <= 0 || fraction >= 1) { throw new Error('Fraction must be between 0 and 1'); } const contentLengths = history.map((content) => JSON.stringify(content).length); const totalCharacters = contentLengths.reduce((sum, length) => sum + length, 0); const targetCharacters = totalCharacters * fraction; let charactersSoFar = 0; for (let i = 0; i < contentLengths.length; i++) { charactersSoFar += contentLengths[i]; if (charactersSoFar >= targetCharacters) { return i; } } return contentLengths.length; } export class GeminiClient { config; chat; contentGenerator; embeddingModel; generateContentConfig = { temperature: 0, topP: 1, }; sessionTurnCount = 0; MAX_TURNS = 100; /** * Threshold for compression token count as a fraction of the model's token limit. * If the chat history exceeds this threshold, it will be compressed. */ COMPRESSION_TOKEN_THRESHOLD = 0.7; /** * The fraction of the latest chat history to keep. A value of 0.3 * means that only the last 30% of the chat history will be kept after compression. */ COMPRESSION_PRESERVE_THRESHOLD = 0.3; loopDetector; lastPromptId; constructor(config) { this.config = config; if (config.getProxy()) { setGlobalDispatcher(new ProxyAgent(config.getProxy())); } this.embeddingModel = config.getEmbeddingModel(); this.loopDetector = new LoopDetectionService(config); this.lastPromptId = this.config.getSessionId(); } async initialize(contentGeneratorConfig, settings) { this.contentGenerator = await createContentGenerator(contentGeneratorConfig, this.config, this.config.getSessionId(), settings); this.chat = await this.startChat(); } getContentGenerator() { if (!this.contentGenerator) { throw new Error('Content generator not initialized'); } return this.contentGenerator; } getUserTier() { return this.contentGenerator?.userTier; } async addHistory(content) { this.getChat().addHistory(content); } getChat() { if (!this.chat) { throw new Error('Chat not initialized'); } return this.chat; } isInitialized() { return this.chat !== undefined && this.contentGenerator !== undefined; } getHistory() { return this.getChat().getHistory(); } setHistory(history) { this.getChat().setHistory(history); } async setTools() { const toolRegistry = await this.config.getToolRegistry(); const toolDeclarations = toolRegistry.getFunctionDeclarations(); const tools = [{ functionDeclarations: toolDeclarations }]; this.getChat().setTools(tools); } async resetChat() { this.chat = await this.startChat(); } async addDirectoryContext() { if (!this.chat) { return; } this.getChat().addHistory({ role: 'user', parts: [{ text: await this.getDirectoryContext() }], }); } async getDirectoryContext() { const workspaceContext = this.config.getWorkspaceContext(); const workspaceDirectories = workspaceContext.getDirectories(); const folderStructures = await Promise.all(workspaceDirectories.map((dir) => getFolderStructure(dir, { fileService: this.config.getFileService(), }))); const folderStructure = folderStructures.join('\n'); const dirList = workspaceDirectories.map((dir) => ` - ${dir}`).join('\n'); const workingDirPreamble = `I'm currently working in the following directories:\n${dirList}\n Folder structures are as follows:\n${folderStructure}`; return workingDirPreamble; } async getEnvironment() { const today = new Date().toLocaleDateString(undefined, { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric', }); const platform = process.platform; const workspaceContext = this.config.getWorkspaceContext(); const workspaceDirectories = workspaceContext.getDirectories(); const folderStructures = await Promise.all(workspaceDirectories.map((dir) => getFolderStructure(dir, { fileService: this.config.getFileService(), }))); const folderStructure = folderStructures.join('\n'); let workingDirPreamble; if (workspaceDirectories.length === 1) { workingDirPreamble = `I'm currently working in the directory: ${workspaceDirectories[0]}`; } else { const dirList = workspaceDirectories .map((dir) => ` - ${dir}`) .join('\n'); workingDirPreamble = `I'm currently working in the following directories:\n${dirList}`; } const context = ` This is the Qwen Code. We are setting up the context for our chat. Today's date is ${today}. My operating system is: ${platform} ${workingDirPreamble} Here is the folder structure of the current working directories:\n ${folderStructure} `.trim(); const initialParts = [{ text: context }]; const toolRegistry = await this.config.getToolRegistry(); // Add full file context if the flag is set if (this.config.getFullContext()) { try { const readManyFilesTool = toolRegistry.getTool('read_many_files'); if (readManyFilesTool) { // Read all files in the target directory const result = await readManyFilesTool.execute({ paths: ['**/*'], // Read everything recursively useDefaultExcludes: true, // Use default excludes }, AbortSignal.timeout(30000)); if (result.llmContent) { initialParts.push({ text: `\n--- Full File Context ---\n${result.llmContent}`, }); } else { console.warn('Full context requested, but read_many_files returned no content.'); } } else { console.warn('Full context requested, but read_many_files tool not found.'); } } catch (error) { // Not using reportError here as it's a startup/config phase, not a chat/generation phase error. console.error('Error reading full file context:', error); initialParts.push({ text: '\n--- Error reading full file context ---', }); } } return initialParts; } async startChat(extraHistory) { const envParts = await this.getEnvironment(); const toolRegistry = await this.config.getToolRegistry(); const toolDeclarations = toolRegistry.getFunctionDeclarations(); const tools = [{ functionDeclarations: toolDeclarations }]; const history = [ { role: 'user', parts: envParts, }, { role: 'model', parts: [{ text: 'Got it. Thanks for the context!' }], }, ...(extraHistory ?? []), ]; try { const userMemory = this.config.getUserMemory(); const systemInstruction = getCoreSystemPrompt(userMemory); const generateContentConfigWithThinking = isThinkingSupported(this.config.getModel()) ? { ...this.generateContentConfig, thinkingConfig: { includeThoughts: true, }, } : this.generateContentConfig; return new GeminiChat(this.config, this.getContentGenerator(), { systemInstruction, ...generateContentConfigWithThinking, tools, }, history); } catch (error) { await reportError(error, 'Error initializing Gemini chat session.', history, 'startChat'); throw new Error(`Failed to initialize chat: ${getErrorMessage(error)}`); } } async *sendMessageStream(request, signal, prompt_id, turns = this.MAX_TURNS, originalModel) { if (this.lastPromptId !== prompt_id) { this.loopDetector.reset(prompt_id); this.lastPromptId = prompt_id; } this.sessionTurnCount++; if (this.config.getMaxSessionTurns() > 0 && this.sessionTurnCount > this.config.getMaxSessionTurns()) { yield { type: GeminiEventType.MaxSessionTurns }; return new Turn(this.getChat(), prompt_id); } // Ensure turns never exceeds MAX_TURNS to prevent infinite loops const boundedTurns = Math.min(turns, this.MAX_TURNS); if (!boundedTurns) { return new Turn(this.getChat(), prompt_id); } // Track the original model from the first call to detect model switching const initialModel = originalModel || this.config.getModel(); const compressed = await this.tryCompressChat(prompt_id); if (compressed) { yield { type: GeminiEventType.ChatCompressed, value: compressed }; } // Check session token limit after compression using accurate token counting const sessionTokenLimit = this.config.getSessionTokenLimit(); if (sessionTokenLimit > 0) { // Get all the content that would be sent in an API call const currentHistory = this.getChat().getHistory(true); const userMemory = this.config.getUserMemory(); const systemPrompt = getCoreSystemPrompt(userMemory); const environment = await this.getEnvironment(); // Create a mock request content to count total tokens const mockRequestContent = [ { role: 'system', parts: [{ text: systemPrompt }, ...environment], }, ...currentHistory, ]; // Use the improved countTokens method for accurate counting const { totalTokens: totalRequestTokens } = await this.getContentGenerator().countTokens({ model: this.config.getModel(), contents: mockRequestContent, }); if (totalRequestTokens !== undefined && totalRequestTokens > sessionTokenLimit) { yield { type: GeminiEventType.SessionTokenLimitExceeded, value: { currentTokens: totalRequestTokens, limit: sessionTokenLimit, message: `Session token limit exceeded: ${totalRequestTokens} tokens > ${sessionTokenLimit} limit. ` + 'Please start a new session or increase the sessionTokenLimit in your settings.json.', }, }; return new Turn(this.getChat(), prompt_id); } } if (this.config.getIdeModeFeature() && this.config.getIdeMode()) { const ideContextState = ideContext.getIdeContext(); const openFiles = ideContextState?.workspaceState?.openFiles; if (openFiles && openFiles.length > 0) { const contextParts = []; const firstFile = openFiles[0]; const activeFile = firstFile.isActive ? firstFile : undefined; if (activeFile) { contextParts.push(`This is the file that the user is looking at:\n- Path: ${activeFile.path}`); if (activeFile.cursor) { contextParts.push(`This is the cursor position in the file:\n- Cursor Position: Line ${activeFile.cursor.line}, Character ${activeFile.cursor.character}`); } if (activeFile.selectedText) { contextParts.push(`This is the selected text in the file:\n- ${activeFile.selectedText}`); } } const otherOpenFiles = activeFile ? openFiles.slice(1) : openFiles; if (otherOpenFiles.length > 0) { const recentFiles = otherOpenFiles .map((file) => `- ${file.path}`) .join('\n'); const heading = activeFile ? `Here are some other files the user has open, with the most recent at the top:` : `Here are some files the user has open, with the most recent at the top:`; contextParts.push(`${heading}\n${recentFiles}`); } if (contextParts.length > 0) { request = [ { text: contextParts.join('\n') }, ...(Array.isArray(request) ? request : [request]), ]; } } } const turn = new Turn(this.getChat(), prompt_id); const loopDetected = await this.loopDetector.turnStarted(signal); if (loopDetected) { yield { type: GeminiEventType.LoopDetected }; return turn; } const resultStream = turn.run(request, signal); for await (const event of resultStream) { if (this.loopDetector.addAndCheck(event)) { yield { type: GeminiEventType.LoopDetected }; return turn; } yield event; } if (!turn.pendingToolCalls.length && signal && !signal.aborted) { // Check if model was switched during the call (likely due to quota error) const currentModel = this.config.getModel(); if (currentModel !== initialModel) { // Model was switched (likely due to quota error fallback) // Don't continue with recursive call to prevent unwanted Flash execution return turn; } const nextSpeakerCheck = await checkNextSpeaker(this.getChat(), this, signal); logNextSpeakerCheck(this.config, new NextSpeakerCheckEvent(prompt_id, turn.finishReason?.toString() || '', nextSpeakerCheck?.next_speaker || '')); if (nextSpeakerCheck?.next_speaker === 'model') { const nextRequest = [{ text: 'Please continue.' }]; // This recursive call's events will be yielded out, but the final // turn object will be from the top-level call. yield* this.sendMessageStream(nextRequest, signal, prompt_id, boundedTurns - 1, initialModel); } } return turn; } async generateJson(contents, schema, abortSignal, model, config = {}) { // Use current model from config instead of hardcoded Flash model const modelToUse = model || this.config.getModel() || DEFAULT_GEMINI_FLASH_MODEL; try { const userMemory = this.config.getUserMemory(); const systemPromptMappings = this.config.getSystemPromptMappings(); const systemInstruction = getCoreSystemPrompt(userMemory, { systemPromptMappings, }); const requestConfig = { abortSignal, ...this.generateContentConfig, ...config, }; // Convert schema to function declaration const functionDeclaration = { name: 'respond_in_schema', description: 'Provide the response in provided schema', parameters: schema, }; const tools = [ { functionDeclarations: [functionDeclaration], }, ]; const apiCall = () => this.getContentGenerator().generateContent({ model: modelToUse, config: { ...requestConfig, systemInstruction, tools, }, contents, }, this.lastPromptId); // Check if the content generator has its own retry logic (e.g., API key rotation) const hasInternalRetryLogic = this.getContentGenerator().rotationManager !== undefined; let result; if (hasInternalRetryLogic) { // Content generator handles retries internally (e.g., with API key rotation) result = await apiCall(); } else { // Use external retry logic for content generators without internal retry handling result = await retryWithBackoff(apiCall, { onPersistent429: async (authType, error) => await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); } const functionCalls = getFunctionCalls(result); if (functionCalls && functionCalls.length > 0) { const functionCall = functionCalls.find((call) => call.name === 'respond_in_schema'); if (functionCall && functionCall.args) { return functionCall.args; } } return {}; } catch (error) { if (abortSignal.aborted) { throw error; } // Avoid double reporting for the empty response case handled above if (error instanceof Error && error.message === 'API returned an empty response for generateJson.') { throw error; } await reportError(error, 'Error generating JSON content via API.', contents, 'generateJson-api'); throw new Error(`Failed to generate JSON content: ${getErrorMessage(error)}`); } } async generateContent(contents, generationConfig, abortSignal, model) { const modelToUse = model ?? this.config.getModel(); const configToUse = { ...this.generateContentConfig, ...generationConfig, }; try { const userMemory = this.config.getUserMemory(); const systemPromptMappings = this.config.getSystemPromptMappings(); const systemInstruction = getCoreSystemPrompt(userMemory, { systemPromptMappings, }); const requestConfig = { abortSignal, ...configToUse, systemInstruction, }; const apiCall = () => this.getContentGenerator().generateContent({ model: modelToUse, config: requestConfig, contents, }, this.lastPromptId); // Check if the content generator has its own retry logic (e.g., API key rotation) const hasInternalRetryLogic = this.getContentGenerator().rotationManager !== undefined; let result; if (hasInternalRetryLogic) { // Content generator handles retries internally (e.g., with API key rotation) result = await apiCall(); } else { // Use external retry logic for content generators without internal retry handling result = await retryWithBackoff(apiCall, { onPersistent429: async (authType, error) => await this.handleFlashFallback(authType, error), authType: this.config.getContentGeneratorConfig()?.authType, }); } return result; } catch (error) { if (abortSignal.aborted) { throw error; } await reportError(error, `Error generating content via API with model ${modelToUse}.`, { requestContents: contents, requestConfig: configToUse, }, 'generateContent-api'); throw new Error(`Failed to generate content with model ${modelToUse}: ${getErrorMessage(error)}`); } } async generateEmbedding(texts) { if (!texts || texts.length === 0) { return []; } const embedModelParams = { model: this.embeddingModel, contents: texts, }; const embedContentResponse = await this.getContentGenerator().embedContent(embedModelParams); if (!embedContentResponse.embeddings || embedContentResponse.embeddings.length === 0) { throw new Error('No embeddings found in API response.'); } if (embedContentResponse.embeddings.length !== texts.length) { throw new Error(`API returned a mismatched number of embeddings. Expected ${texts.length}, got ${embedContentResponse.embeddings.length}.`); } return embedContentResponse.embeddings.map((embedding, index) => { const values = embedding.values; if (!values || values.length === 0) { throw new Error(`API returned an empty embedding for input text at index ${index}: "${texts[index]}"`); } return values; }); } async tryCompressChat(prompt_id, force = false) { const curatedHistory = this.getChat().getHistory(true); // Regardless of `force`, don't do anything if the history is empty. if (curatedHistory.length === 0) { return null; } const model = this.config.getModel(); const { totalTokens: originalTokenCount } = await this.getContentGenerator().countTokens({ model, contents: curatedHistory, }); if (originalTokenCount === undefined) { console.warn(`Could not determine token count for model ${model}.`); return null; } // Don't compress if not forced and we are under the limit. if (!force && originalTokenCount < this.COMPRESSION_TOKEN_THRESHOLD * tokenLimit(model)) { return null; } let compressBeforeIndex = findIndexAfterFraction(curatedHistory, 1 - this.COMPRESSION_PRESERVE_THRESHOLD); // Find the first user message after the index. This is the start of the next turn. while (compressBeforeIndex < curatedHistory.length && (curatedHistory[compressBeforeIndex]?.role === 'model' || isFunctionResponse(curatedHistory[compressBeforeIndex]))) { compressBeforeIndex++; } const historyToCompress = curatedHistory.slice(0, compressBeforeIndex); const historyToKeep = curatedHistory.slice(compressBeforeIndex); this.getChat().setHistory(historyToCompress); const { text: summary } = await this.getChat().sendMessage({ message: { text: 'First, reason in your scratchpad. Then, generate the <state_snapshot>.', }, config: { systemInstruction: { text: getCompressionPrompt() }, }, }, prompt_id); this.chat = await this.startChat([ { role: 'user', parts: [{ text: summary }], }, { role: 'model', parts: [{ text: 'Got it. Thanks for the additional context!' }], }, ...historyToKeep, ]); const { totalTokens: newTokenCount } = await this.getContentGenerator().countTokens({ // model might change after calling `sendMessage`, so we get the newest value from config model: this.config.getModel(), contents: this.getChat().getHistory(), }); if (newTokenCount === undefined) { console.warn('Could not determine compressed history token count.'); return null; } return { originalTokenCount, newTokenCount, }; } /** * Handles falling back to Flash model when persistent 429 errors occur for OAuth users. * Uses a fallback handler if provided by the config; otherwise, returns null. */ async handleFlashFallback(authType, error) { // Only handle fallback for OAuth users if (authType !== AuthType.LOGIN_WITH_GOOGLE) { return null; } const currentModel = this.config.getModel(); const fallbackModel = DEFAULT_GEMINI_FLASH_MODEL; // Don't fallback if already using Flash model if (currentModel === fallbackModel) { return null; } // Check if config has a fallback handler (set by CLI package) const fallbackHandler = this.config.flashFallbackHandler; if (typeof fallbackHandler === 'function') { try { const accepted = await fallbackHandler(currentModel, fallbackModel, error); if (accepted !== false && accepted !== null) { this.config.setModel(fallbackModel); this.config.setFallbackMode(true); return fallbackModel; } // Check if the model was switched manually in the handler if (this.config.getModel() === fallbackModel) { return null; // Model was switched but don't continue with current prompt } } catch (error) { console.warn('Flash fallback handler failed:', error); } } return null; } } //# sourceMappingURL=client.js.map