UNPKG

quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

240 lines (237 loc) 10.9 kB
/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { SchemaValidator } from '../utils/schemaValidator.js'; import { BaseTool, ToolConfirmationOutcome, } from './tools.js'; import { getErrorMessage } from '../utils/errors.js'; import { ApprovalMode } from '../config/config.js'; import { getResponseText } from '../utils/generateContentResponseUtilities.js'; import { fetchWithTimeout, isPrivateIp } from '../utils/fetch.js'; import { convert } from 'html-to-text'; const URL_FETCH_TIMEOUT_MS = 10000; const MAX_CONTENT_LENGTH = 100000; // Helper function to extract URLs from a string function extractUrls(text) { const urlRegex = /(https?:\/\/[^\s]+)/g; return text.match(urlRegex) || []; } /** * Implementation of the WebFetch tool logic */ export class WebFetchTool extends BaseTool { config; static Name = 'web_fetch'; constructor(config) { super(WebFetchTool.Name, 'WebFetch', "Processes content from URL(s), including local and private network addresses (e.g., localhost), embedded in a prompt. Include up to 20 URLs and instructions (e.g., summarize, extract specific data) directly in the 'prompt' parameter.", { properties: { prompt: { description: 'A comprehensive prompt that includes the URL(s) (up to 20) to fetch and specific instructions on how to process their content (e.g., "Summarize https://example.com/article and extract key points from https://another.com/data"). Must contain as least one URL starting with http:// or https://.', type: 'string', }, }, required: ['prompt'], type: 'object', }); this.config = config; } async executeFallback(params, signal) { const urls = extractUrls(params.prompt); if (urls.length === 0) { return { llmContent: 'Error: No URL found in the prompt for fallback.', returnDisplay: 'Error: No URL found in the prompt for fallback.', }; } // For now, we only support one URL for fallback let url = urls[0]; // Convert GitHub blob URL to raw URL if (url.includes('github.com') && url.includes('/blob/')) { url = url .replace('github.com', 'raw.githubusercontent.com') .replace('/blob/', '/'); } try { const response = await fetchWithTimeout(url, URL_FETCH_TIMEOUT_MS); if (!response.ok) { throw new Error(`Request failed with status code ${response.status} ${response.statusText}`); } const html = await response.text(); const textContent = convert(html, { wordwrap: false, selectors: [ { selector: 'a', options: { ignoreHref: true } }, { selector: 'img', format: 'skip' }, ], }).substring(0, MAX_CONTENT_LENGTH); const geminiClient = this.config.getGeminiClient(); const fallbackPrompt = `The user requested the following: "${params.prompt}". I was unable to access the URL directly. Instead, I have fetched the raw content of the page. Please use the following content to answer the user's request. Do not attempt to access the URL again. --- ${textContent} ---`; const result = await geminiClient.generateContent([{ role: 'user', parts: [{ text: fallbackPrompt }] }], {}, signal); const resultText = getResponseText(result) || ''; return { llmContent: resultText, returnDisplay: `Content for ${url} processed using fallback fetch.`, }; } catch (e) { const error = e; const errorMessage = `Error during fallback fetch for ${url}: ${error.message}`; return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, }; } } validateParams(params) { if (this.schema.parameters && !SchemaValidator.validate(this.schema.parameters, params)) { return 'Parameters failed schema validation.'; } if (!params.prompt || params.prompt.trim() === '') { return "The 'prompt' parameter cannot be empty and must contain URL(s) and instructions."; } if (!params.prompt.includes('http://') && !params.prompt.includes('https://')) { return "The 'prompt' must contain at least one valid URL (starting with http:// or https://)."; } return null; } getDescription(params) { const displayPrompt = params.prompt.length > 100 ? params.prompt.substring(0, 97) + '...' : params.prompt; return `Processing URLs and instructions from prompt: "${displayPrompt}"`; } async shouldConfirmExecute(params) { if (this.config.getApprovalMode() === ApprovalMode.AUTO_EDIT) { return false; } const validationError = this.validateParams(params); if (validationError) { return false; } // Perform GitHub URL conversion here to differentiate between user-provided // URL and the actual URL to be fetched. const urls = extractUrls(params.prompt).map((url) => { if (url.includes('github.com') && url.includes('/blob/')) { return url .replace('github.com', 'raw.githubusercontent.com') .replace('/blob/', '/'); } return url; }); const confirmationDetails = { type: 'info', title: `Confirm Web Fetch`, prompt: params.prompt, urls, onConfirm: async (outcome) => { if (outcome === ToolConfirmationOutcome.ProceedAlways) { this.config.setApprovalMode(ApprovalMode.AUTO_EDIT); } }, }; return confirmationDetails; } async execute(params, signal) { const validationError = this.validateParams(params); if (validationError) { return { llmContent: `Error: Invalid parameters provided. Reason: ${validationError}`, returnDisplay: validationError, }; } const userPrompt = params.prompt; const urls = extractUrls(userPrompt); const url = urls[0]; const isPrivate = isPrivateIp(url); if (isPrivate) { return this.executeFallback(params, signal); } const geminiClient = this.config.getGeminiClient(); try { const response = await geminiClient.generateContent([{ role: 'user', parts: [{ text: userPrompt }] }], { tools: [{ urlContext: {} }] }, signal); console.debug(`[WebFetchTool] Full response for prompt "${userPrompt.substring(0, 50)}...":`, JSON.stringify(response, null, 2)); let responseText = getResponseText(response) || ''; const urlContextMeta = response.candidates?.[0]?.urlContextMetadata; const groundingMetadata = response.candidates?.[0]?.groundingMetadata; const sources = groundingMetadata?.groundingChunks; const groundingSupports = groundingMetadata?.groundingSupports; // Error Handling let processingError = false; if (urlContextMeta?.urlMetadata && urlContextMeta.urlMetadata.length > 0) { const allStatuses = urlContextMeta.urlMetadata.map((m) => m.urlRetrievalStatus); if (allStatuses.every((s) => s !== 'URL_RETRIEVAL_STATUS_SUCCESS')) { processingError = true; } } else if (!responseText.trim() && !sources?.length) { // No URL metadata and no content/sources processingError = true; } if (!processingError && !responseText.trim() && (!sources || sources.length === 0)) { // Successfully retrieved some URL (or no specific error from urlContextMeta), but no usable text or grounding data. processingError = true; } if (processingError) { return this.executeFallback(params, signal); } const sourceListFormatted = []; if (sources && sources.length > 0) { sources.forEach((source, index) => { const title = source.web?.title || 'Untitled'; const uri = source.web?.uri || 'Unknown URI'; // Fallback if URI is missing sourceListFormatted.push(`[${index + 1}] ${title} (${uri})`); }); if (groundingSupports && groundingSupports.length > 0) { const insertions = []; groundingSupports.forEach((support) => { if (support.segment && support.groundingChunkIndices) { const citationMarker = support.groundingChunkIndices .map((chunkIndex) => `[${chunkIndex + 1}]`) .join(''); insertions.push({ index: support.segment.endIndex, marker: citationMarker, }); } }); insertions.sort((a, b) => b.index - a.index); const responseChars = responseText.split(''); insertions.forEach((insertion) => { responseChars.splice(insertion.index, 0, insertion.marker); }); responseText = responseChars.join(''); } if (sourceListFormatted.length > 0) { responseText += ` Sources: ${sourceListFormatted.join('\n')}`; } } const llmContent = responseText; console.debug(`[WebFetchTool] Formatted tool response for prompt "${userPrompt}:\n\n":`, llmContent); return { llmContent, returnDisplay: `Content processed from prompt.`, }; } catch (error) { const errorMessage = `Error processing web content for prompt "${userPrompt.substring(0, 50)}...": ${getErrorMessage(error)}`; console.error(errorMessage, error); return { llmContent: `Error: ${errorMessage}`, returnDisplay: `Error: ${errorMessage}`, }; } } } //# sourceMappingURL=web-fetch.js.map